Hello
I am running into trouble with an export_bgen script that used to work but now throws an error. It does export a .bgen file but no sample file. As far as I can tell, the MatrixTable looks fine.
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-40-8e317fe0472b> in <module>
1 # BGEN
----> 2 write_bgen(mt, "file:" + BGEN_FILE)
/opt/notebooks/gogoGPCR/src/matrixtables.py in write_bgen(mt, output)
272
273 hl.export_bgen(
--> 274 mt=mt, varid=mt.varid, rsid=mt.varid, gp=mt.GP, output=output
275 )
276
<decorator-gen-2075> in export_bgen(mt, output, gp, varid, rsid, parallel)
/opt/conda/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(__original_func, *args, **kwargs):
613 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 614 return __original_func(*args_, **kwargs_)
615
616 return wrapper
/opt/conda/lib/python3.6/site-packages/hail/methods/impex.py in export_bgen(mt, output, gp, varid, rsid, parallel)
237 Env.backend().execute(ir.MatrixWrite(mt._mir, ir.MatrixBGENWriter(
238 output,
--> 239 parallel)))
240
241
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in execute(self, ir, timed)
96 raise HailUserError(message_and_trace) from None
97
---> 98 raise e
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in execute(self, ir, timed)
72 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
73 try:
---> 74 result = json.loads(self._jhc.backend().executeJSON(jir))
75 value = ir.typ._from_json(result['value'])
76 timings = result['timings']
/cluster/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in deco(*args, **kwargs)
30 raise FatalError('%s\n\nJava stack trace:\n%s\n'
31 'Hail version: %s\n'
---> 32 'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
33 except pyspark.sql.utils.CapturedException as e:
34 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: IllegalArgumentException: RangeEnd < RangeStart: rangeEnd = -1, rangeStart = 0, url http://10.0.3.1:8090/DB/R/database-G5B4q0jJ860jjqP2Jq6pfvF5/tmp/export-bgen-concatenated-pSOhNdh8WnCsgwyZRcOUsT/part-1-149-1-0-a1c15e85-4cc8-b5c7-98c1-49df2ce5806c
Java stack trace:
java.lang.IllegalArgumentException: RangeEnd < RangeStart: rangeEnd = -1, rangeStart = 0, url http://10.0.3.1:8090/DB/R/database-G5B4q0jJ860jjqP2Jq6pfvF5/tmp/export-bgen-concatenated-pSOhNdh8WnCsgwyZRcOUsT/part-1-149-1-0-a1c15e85-4cc8-b5c7-98c1-49df2ce5806c
at com.google.common.base.Preconditions.checkArgument(Preconditions.java:122)
at com.dnanexus.hadoop.fs.DNAxFileSystem.createConnection(DNAxFileSystem.java:751)
at com.dnanexus.hadoop.fs.DNAxFileSystem.open(DNAxFileSystem.java:608)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:769)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:83)
at is.hail.io.fs.FS$class.open(FS.scala:139)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:70)
at is.hail.io.fs.FS$class.open(FS.scala:148)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:70)
at is.hail.io.gen.ExportBGEN$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$4.apply(ExportBGEN.scala:377)
at is.hail.io.gen.ExportBGEN$$anonfun$1$$anonfun$apply$mcV$sp$1$$anonfun$apply$4.apply(ExportBGEN.scala:376)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at is.hail.io.gen.ExportBGEN$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ExportBGEN.scala:376)
at is.hail.io.gen.ExportBGEN$$anonfun$1$$anonfun$apply$mcV$sp$1.apply(ExportBGEN.scala:372)
at is.hail.utils.package$.using(package.scala:618)
at is.hail.io.gen.ExportBGEN$$anonfun$1.apply$mcV$sp(ExportBGEN.scala:372)
at is.hail.io.gen.ExportBGEN$$anonfun$1.apply(ExportBGEN.scala:372)
at is.hail.io.gen.ExportBGEN$$anonfun$1.apply(ExportBGEN.scala:372)
at is.hail.utils.package$.time(package.scala:158)
at is.hail.io.gen.ExportBGEN$.apply(ExportBGEN.scala:371)
at is.hail.expr.ir.MatrixBGENWriter.apply(MatrixWriter.scala:335)
at is.hail.expr.ir.WrappedMatrixWriter.apply(MatrixWriter.scala:40)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:825)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.interpretAndCoerce$1(InterpretNonCompilable.scala:16)
at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:58)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.transform(LoweringPass.scala:67)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:15)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:13)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.apply(LoweringPass.scala:62)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:14)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:12)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:28)
at is.hail.backend.spark.SparkBackend.is$hail$backend$spark$SparkBackend$$_execute(SparkBackend.scala:354)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:338)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:335)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:25)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:23)
at is.hail.utils.package$.using(package.scala:618)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:23)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:247)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:335)
at is.hail.backend.spark.SparkBackend$$anonfun$7.apply(SparkBackend.scala:379)
at is.hail.backend.spark.SparkBackend$$anonfun$7.apply(SparkBackend.scala:377)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:377)
at sun.reflect.GeneratedMethodAccessor52.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.61-3c86d3ba497a
Error summary: IllegalArgumentException: RangeEnd < RangeStart: rangeEnd = -1, rangeStart = 0, url http://10.0.3.1:8090/DB/R/database-G5B4q0jJ860jjqP2Jq6pfvF5/tmp/export-bgen-concatenated-pSOhNdh8WnCsgwyZRcOUsT/part-1-149-1-0-a1c15e85-4cc8-b5c7-98c1-49df2ce5806c
Final part of code in question:
def add_varid(mt: hl.MatrixTable) -> hl.MatrixTable:
"""Annotate rows with varid
Parameters
----------
mt : hl.MatrixTable
[description]
Returns
-------
hl.MatrixTable
[description]
"""
mt = mt.annotate_rows(
varid=hl.delimit(
[
mt.locus.contig,
hl.str(mt.locus.position),
mt.alleles[0],
mt.alleles[1],
],
":",
)
)
return mt
def recode_GT_to_GP(
mt: hl.matrixtable.MatrixTable,
) -> hl.matrixtable.MatrixTable:
GPs = hl.literal([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
mt = mt.annotate_entries(GP=GPs[mt.GT.n_alt_alleles()])
return mt
def write_bgen(mt: hl.matrixtable.MatrixTable, output: str) -> None:
mt = add_varid(mt)
mt = recode_GT_to_GP(mt)
hl.export_bgen(
mt=mt, varid=mt.varid, rsid=mt.varid, gp=mt.GP, output=output
)
write_bgen(mt, "file:" + BGEN_FILE)
I hope the issue is clear and let me know if you need the log or full notebook.
Thanks a bunch!