I’m getting the below error when trying to run a script with Hail version 0.2.75-52791e9868e4, but I didn’t get this error when running the same script using Hail version 0.2.74-0c3a74d12093-do you know why this might be different?
File "/tmp/7d4dcb21a574404199c7d4e68f9df4b0/add_annotations.py", line 2082, in <module>
main(args)
File "/tmp/7d4dcb21a574404199c7d4e68f9df4b0/add_annotations.py", line 1942, in main
mt = mt.checkpoint(
File "<decorator-gen-1255>", line 2, in checkpoint
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2487, in checkpoint
self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec)
File "<decorator-gen-1257>", line 2, in write
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2529, in write
Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 98, in execute
raise e
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 74, in execute
result = json.loads(self._jhc.backend().executeJSON(jir))
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 30, in deco
raise FatalError('%s\n\nJava stack trace:\n%s\n'
hail.utils.java.FatalError: HailException: array index out of bounds: index=0, length=0```
File "/tmp/aad36b59d47d4d9aab582310e65b7df5/add_annotations.py", line 2081, in <module>
main(args)
File "/tmp/aad36b59d47d4d9aab582310e65b7df5/add_annotations.py", line 1941, in main
mt = mt.checkpoint(
File "<decorator-gen-1255>", line 2, in checkpoint
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2487, in checkpoint
self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec)
File "<decorator-gen-1257>", line 2, in write
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2529, in write
Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 98, in execute
raise e
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 74, in execute
result = json.loads(self._jhc.backend().executeJSON(jir))
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 30, in deco
raise FatalError('%s\n\nJava stack trace:\n%s\n'
hail.utils.java.FatalError: HailException: array index out of bounds: index=0, length=0
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 131 in stage 80.0 failed 20 times, most recent failure: Lost task 131.19 in stage 80.0 (TID 15670) (kml-sw-h6t0.c.broad-mpg-gnomad.internal executor 27): is.hail.utils.HailException: array index out of bounds: index=0, length=0
at __C12248collect_distributed_array.__m12410arrayref_bounds_check(Unknown Source)
at __C12248collect_distributed_array.__m12259split_Let_region12_17(Unknown Source)
at __C12248collect_distributed_array.__m12259split_Let(Unknown Source)
at __C12248collect_distributed_array.apply(Unknown Source)
at __C12248collect_distributed_array.apply(Unknown Source)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:723)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2254)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2203)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2202)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2202)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2441)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2383)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2372)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:286)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:28)
at __C12237Compiled.apply(Emit.scala)
at is.hail.expr.ir.LoweredTableReader$.makeCoercer(TableIR.scala:322)
at is.hail.expr.ir.GenericTableValue.getLTVCoercer(GenericTableValue.scala:131)
at is.hail.expr.ir.GenericTableValue.toTableStage(GenericTableValue.scala:156)
at is.hail.io.vcf.MatrixVCFReader.lower(LoadVCF.scala:1791)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:402)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:859)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:1102)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:957)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:859)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:957)
at is.hail.expr.ir.lowering.LowerTableIR$.$anonfun$apply$101(LowerTableIR.scala:1040)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:1040)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:480)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:986)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:859)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:477)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:859)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:563)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:859)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:477)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:859)
at is.hail.expr.ir.lowering.LowerTableIR$.apply(LowerTableIR.scala:1330)
at is.hail.expr.ir.lowering.LowerToCDA$.lower(LowerToCDA.scala:68)
at is.hail.expr.ir.lowering.LowerToCDA$.apply(LowerToCDA.scala:17)
at is.hail.expr.ir.lowering.LowerToDistributedArrayPass.transform(LoweringPass.scala:76)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:26)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:66)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:71)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:68)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:12)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:63)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:14)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:12)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:46)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:381)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$1(SparkBackend.scala:365)
at is.hail.expr.ir.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.expr.ir.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:46)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:362)
at is.hail.backend.spark.SparkBackend.$anonfun$executeJSON$1(SparkBackend.scala:406)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:404)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
is.hail.utils.HailException: array index out of bounds: index=0, length=0
at __C12248collect_distributed_array.__m12410arrayref_bounds_check(Unknown Source)
at __C12248collect_distributed_array.__m12259split_Let_region12_17(Unknown Source)
at __C12248collect_distributed_array.__m12259split_Let(Unknown Source)
at __C12248collect_distributed_array.apply(Unknown Source)
at __C12248collect_distributed_array.apply(Unknown Source)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:723)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.75-52791e9868e4
Error summary: HailException: array index out of bounds: index=0, length=0
Ok, I can make you a custom version of hail, you run that custom version, it’ll give you a better error message to give to us, and then we’ll be able to figure out what was going wrong. In the meantime, if you do hl._set_flags(no_whole_stage_codegen='1') at the beginning of your script, it’ll temporarily fix it.
and you’ll have a custom version of hail installed. Then run your pipeline again (without the hl._set_flag thing) and send us the full error message from that and we’ll be able to debug the problem you’re running into.
Adding hl._set_flags(no_whole_stage_codegen='1') got rid of the array index out of bounds error I was getting when running the script on a small test dataset, although now when I’m trying to run it with more samples I’m getting ClassTooLargeException
File "/tmp/be7f6534b3f34d7f932c047fe8ea043c/add_annotations.py", line 2083, in <module>
main(args)
File "/tmp/be7f6534b3f34d7f932c047fe8ea043c/add_annotations.py", line 1952, in main
mt = mt.checkpoint(
File "<decorator-gen-1255>", line 2, in checkpoint
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2487, in checkpoint
self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec)
File "<decorator-gen-1257>", line 2, in write
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2529, in write
Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 98, in execute
raise e
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 74, in execute
result = json.loads(self._jhc.backend().executeJSON(jir))
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 30, in deco
raise FatalError('%s\n\nJava stack trace:\n%s\n'
hail.utils.java.FatalError: ClassTooLargeException: Class too large: __C18464Compiled
Java stack trace:
is.hail.relocated.org.objectweb.asm.ClassTooLargeException: Class too large: __C18464Compiled
at is.hail.relocated.org.objectweb.asm.ClassWriter.toByteArray(ClassWriter.java:599)
at is.hail.lir.Emit$.apply(Emit.scala:217)
at is.hail.lir.Classx.$anonfun$asBytes$4(X.scala:110)
at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:366)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364)
at scala.collection.AbstractIterator.to(Iterator.scala:1431)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1431)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1431)
at is.hail.lir.Classx.asBytes(X.scala:123)
at is.hail.asm4s.ClassBuilder.classBytes(ClassBuilder.scala:351)
at is.hail.asm4s.ModuleBuilder.$anonfun$classesBytes$1(ClassBuilder.scala:152)
at is.hail.asm4s.ModuleBuilder.$anonfun$classesBytes$1$adapted(ClassBuilder.scala:152)
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:366)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364)
at scala.collection.AbstractIterator.to(Iterator.scala:1431)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1431)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1431)
at is.hail.asm4s.ModuleBuilder.classesBytes(ClassBuilder.scala:153)
at is.hail.expr.ir.EmitClassBuilder.resultWithIndex(EmitClassBuilder.scala:660)
at is.hail.expr.ir.WrappedEmitClassBuilder.resultWithIndex(EmitClassBuilder.scala:154)
at is.hail.expr.ir.WrappedEmitClassBuilder.resultWithIndex$(EmitClassBuilder.scala:154)
at is.hail.expr.ir.EmitFunctionBuilder.resultWithIndex(EmitClassBuilder.scala:1052)
at is.hail.expr.ir.Compile$.apply(Compile.scala:78)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:2008)
at is.hail.expr.ir.TableMapGlobals.execute(TableIR.scala:2315)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:2002)
at is.hail.expr.ir.TableMapGlobals.execute(TableIR.scala:2315)
at is.hail.expr.ir.TableIR.analyzeAndExecute(TableIR.scala:57)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:851)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:56)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:19)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:66)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:71)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:68)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:12)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:63)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:14)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:12)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:46)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:381)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$1(SparkBackend.scala:365)
at is.hail.expr.ir.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.expr.ir.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:46)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:362)
at is.hail.backend.spark.SparkBackend.$anonfun$executeJSON$1(SparkBackend.scala:406)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:404)
at sun.reflect.GeneratedMethodAccessor132.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.75-52791e9868e4
Error summary: ClassTooLargeException: Class too large: __C18464Compiled```
As for the class too large exception: to clarify, did you get that with regular hail 0.2.75, or did you get that using my debug hail wheel that I posted? I expect my debug hail branch to not scale well because of the extra debug information stored in it.
I just tried running v76 (without hl._set_flags(no_whole_stage_codegen=‘1’)) on the small test dataset and get this:
File "/tmp/94b67211161a4b668e3383365cb31e91/add_annotations.py", line 2083, in <module>
main(args)
File "/tmp/94b67211161a4b668e3383365cb31e91/add_annotations.py", line 1952, in main
mt = mt.checkpoint(
File "<decorator-gen-1255>", line 2, in checkpoint
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2487, in checkpoint
self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec)
File "<decorator-gen-1257>", line 2, in write
File "/opt/conda/default/lib/python3.8/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/opt/conda/default/lib/python3.8/site-packages/hail/matrixtable.py", line 2529, in write
Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 98, in execute
raise e
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 74, in execute
result = json.loads(self._jhc.backend().executeJSON(jir))
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1304, in __call__
File "/opt/conda/default/lib/python3.8/site-packages/hail/backend/py4j_backend.py", line 30, in deco
raise FatalError('%s\n\nJava stack trace:\n%s\n'
hail.utils.java.FatalError: ClassFormatError: Too many arguments in method signature in class file __C21837collect_distributed_array
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 93.0 failed 20 times, most recent failure: Lost task 12.19 in stage 93.0 (TID 15689) (kml-sw-jj2p.c.broad-mpg-gnomad.internal executor 63): java.lang.ClassFormatError: Too many arguments in method signature in class file __C21837collect_distributed_array
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:756)
at java.lang.ClassLoader.defineClass(ClassLoader.java:635)
at is.hail.asm4s.package$HailClassLoader$.liftedTree1$1(package.scala:253)
at is.hail.asm4s.package$HailClassLoader$.loadOrDefineClass(package.scala:249)
at is.hail.asm4s.ClassesBytes.$anonfun$load$1(ClassBuilder.scala:65)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at is.hail.asm4s.ClassesBytes.load(ClassBuilder.scala:63)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:669)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:662)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:723)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2254)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2203)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2202)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2202)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2441)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2383)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2372)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:286)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:28)
at __C21299Compiled.__m21447split_Let_region136_162(Emit.scala)
at __C21299Compiled.__m21447split_Let_region1_166(Emit.scala)
at __C21299Compiled.__m21447split_Let(Emit.scala)
at __C21299Compiled.apply(Emit.scala)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$3(CompileAndEvaluate.scala:56)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:56)
at is.hail.expr.ir.CompileAndEvaluate$.evalToIR(CompileAndEvaluate.scala:29)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:29)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:66)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:71)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:68)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:12)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:63)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:14)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:12)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:46)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:381)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$1(SparkBackend.scala:365)
at is.hail.expr.ir.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.expr.ir.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:46)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:362)
at is.hail.backend.spark.SparkBackend.$anonfun$executeJSON$1(SparkBackend.scala:406)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:404)
at sun.reflect.GeneratedMethodAccessor128.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
java.lang.ClassFormatError: Too many arguments in method signature in class file __C21837collect_distributed_array
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:756)
at java.lang.ClassLoader.defineClass(ClassLoader.java:635)
at is.hail.asm4s.package$HailClassLoader$.liftedTree1$1(package.scala:253)
at is.hail.asm4s.package$HailClassLoader$.loadOrDefineClass(package.scala:249)
at is.hail.asm4s.ClassesBytes.$anonfun$load$1(ClassBuilder.scala:65)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at is.hail.asm4s.ClassesBytes.load(ClassBuilder.scala:63)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:669)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:662)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:723)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.76-d470e2fea3f0
Error summary: ClassFormatError: Too many arguments in method signature in class file __C21837collect_distributed_array
Could you try without the set_flag? We fixed the issue that the new execution path was hitting. Not saying there won’t be another issue, but the first one is solved!
Hi, I wonder if the problem was ever fixed. I am getting the same issue with Hail 0.2.95:
hail.utils.java.FatalError: ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
Interesting thing is that out of memory error happens instead of the one above if I run the code that requires more memory (it never happens with much older Hail versions).