Hi @danking!
That’s correct, I’m able to write the matrix table but can’t read it then export it.
I initialize tmp_dir
like so:
my_database = dxpy.find_one_data_object(
name="my_database",
project=dxpy.find_one_project()["id"]
)["id"]
database_dir = f'dnax://{my_database}'
sc = pyspark.SparkContext()
spark = pyspark.sql.SparkSession(sc)
hl.init(sc=sc, tmp_dir=f'{database_dir}/tmp/')
Here is the full stack trace when I try to export:
2022-09-26 16:13:17 Hail: WARN: export_vcf: ignored the following fields:
'gnomad_info' (row)
2022-09-26 16:13:17 Hail: WARN: export_vcf found no row field 'info'. Emitting no INFO fields.
2022-09-26 16:13:38 Hail: INFO: merging 84 files totalling 42.7M...
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-11-b35a4d96d3d7> in <module>
14 hl.export_vcf(
15 dataset = mt_tmp,
---> 16 output = get_final_filter_bfile_local_path_prefix(chrom=chrom)+'-tmp',
17 )
<decorator-gen-1449> in export_vcf(dataset, output, append_to_header, parallel, metadata, tabix)
/opt/conda/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
578
579 return wrapper
/opt/conda/lib/python3.6/site-packages/hail/methods/impex.py in export_vcf(dataset, output, append_to_header, parallel, metadata, tabix)
538 metadata,
539 tabix)
--> 540 Env.backend().execute(ir.MatrixWrite(dataset._mir, writer))
541
542
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in execute(self, ir, timed)
108 raise HailUserError(message_and_trace) from None
109
--> 110 raise e
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in execute(self, ir, timed)
84 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
85 try:
---> 86 result_tuple = self._jhc.backend().executeEncode(jir, stream_codec)
87 (result, timings) = (result_tuple._1(), result_tuple._2())
88 value = ir.typ._from_encoding(result)
/cluster/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in deco(*args, **kwargs)
29 raise FatalError('%s\n\nJava stack trace:\n%s\n'
30 'Hail version: %s\n'
---> 31 'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
32 except pyspark.sql.utils.CapturedException as e:
33 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: IOException: Server returned HTTP response code: 416 for URL: http://10.0.3.1:8090/DB/R/database-GBvvVKjJg8Jy9gvq4q6q2kB6/tmp/write-table-concatenated-lJSUi4qwqoXAuNkOuqQS8Y/part-00001
Java stack trace:
java.io.IOException: Server returned HTTP response code: 416 for URL: http://10.0.3.1:8090/DB/R/database-GBvvVKjJg8Jy9gvq4q6q2kB6/tmp/write-table-concatenated-lJSUi4qwqoXAuNkOuqQS8Y/part-00001
at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1897)
at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1495)
at com.dnanexus.hadoop.fs.DNAxFileSystem.getConnectionInputStream(DNAxFileSystem.java:710)
at com.dnanexus.hadoop.fs.DNAxFileSystem.open(DNAxFileSystem.java:604)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:769)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:83)
at is.hail.io.fs.FS$$anonfun$copyMergeList$2.apply(FS.scala:301)
at is.hail.io.fs.FS$$anonfun$copyMergeList$2.apply(FS.scala:292)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.io.fs.FS$class.copyMergeList(FS.scala:292)
at is.hail.io.fs.HadoopFS.copyMergeList(HadoopFS.scala:70)
at is.hail.io.fs.FS$$anonfun$1.apply$mcV$sp(FS.scala:272)
at is.hail.io.fs.FS$$anonfun$1.apply(FS.scala:272)
at is.hail.io.fs.FS$$anonfun$1.apply(FS.scala:272)
at is.hail.utils.package$.time(package.scala:159)
at is.hail.io.fs.FS$class.copyMerge(FS.scala:271)
at is.hail.io.fs.HadoopFS.copyMerge(HadoopFS.scala:70)
at is.hail.utils.richUtils.RichRDD$.writeTable$extension(RichRDD.scala:118)
at is.hail.io.vcf.ExportVCF$.apply(ExportVCF.scala:460)
at is.hail.expr.ir.MatrixVCFWriter.apply(MatrixWriter.scala:333)
at is.hail.expr.ir.WrappedMatrixWriter.apply(MatrixWriter.scala:46)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:852)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:57)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:20)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.is$hail$expr$ir$LowerOrInterpretNonCompilable$$rewrite$1(LowerOrInterpretNonCompilable.scala:67)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:72)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:69)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:16)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:16)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:14)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:14)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:64)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:15)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:13)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:13)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:47)
at is.hail.backend.spark.SparkBackend.is$hail$backend$spark$SparkBackend$$_execute(SparkBackend.scala:381)
at is.hail.backend.spark.SparkBackend$$anonfun$8$$anonfun$apply$4.apply(SparkBackend.scala:417)
at is.hail.backend.spark.SparkBackend$$anonfun$8$$anonfun$apply$4.apply(SparkBackend.scala:414)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1$$anonfun$apply$1.apply(ExecuteContext.scala:47)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1$$anonfun$apply$1.apply(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:47)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:46)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:46)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
at is.hail.backend.spark.SparkBackend$$anonfun$8.apply(SparkBackend.scala:414)
at is.hail.backend.spark.SparkBackend$$anonfun$8.apply(SparkBackend.scala:413)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeEncode(SparkBackend.scala:413)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.78-b17627756568
Error summary: IOException: Server returned HTTP response code: 416 for URL: http://10.0.3.1:8090/DB/R/database-GBvvVKjJg8Jy9gvq4q6q2kB6/tmp/write-table-concatenated-lJSUi4qwqoXAuNkOuqQS8Y/part-00001