Hi,
I am using Hail 0.2.95 and Spark 3.1.2. While trying to run the pipeline:
when mt.write is executing I am getting the error:
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 23 in stage 4.0 failed 10 times, most recent failure: Lost task 23.9 in stage 4.0 (TID 2145) (ip-172-23-82-37.ec2.internal executor 2): java.lang.ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:756)
at java.lang.ClassLoader.defineClass(ClassLoader.java:635)
at is.hail.asm4s.HailClassLoader.liftedTree1$1(HailClassLoader.scala:10)
at is.hail.asm4s.HailClassLoader.loadOrDefineClass(HailClassLoader.scala:6)
at is.hail.asm4s.ClassesBytes.$anonfun$load$1(ClassBuilder.scala:64)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at is.hail.asm4s.ClassesBytes.load(ClassBuilder.scala:62)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:717)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:710)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$4(BackendUtils.scala:40)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$3(BackendUtils.scala:39)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:761)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2470)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2419)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2418)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2418)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1125)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1125)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1125)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2684)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2626)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2615)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:914)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2241)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2262)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2281)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2306)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:321)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:37)
at __C827Compiled.__m859split_CollectDistributedArray_region81_96(Emit.scala)
at __C827Compiled.__m859split_CollectDistributedArray_region19_102(Emit.scala)
at __C827Compiled.__m859split_CollectDistributedArray(Emit.scala)
at __C827Compiled.apply(Emit.scala)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$3(CompileAndEvaluate.scala:57)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:57)
at is.hail.expr.ir.CompileAndEvaluate$.evalToIR(CompileAndEvaluate.scala:30)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:30)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:67)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:72)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:69)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:14)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:64)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:15)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:13)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:13)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:47)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:416)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$2(SparkBackend.scala:452)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:58)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:310)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$1(SparkBackend.scala:449)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeEncode(SparkBackend.scala:448)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
java.lang.ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:756)
at java.lang.ClassLoader.defineClass(ClassLoader.java:635)
at is.hail.asm4s.HailClassLoader.liftedTree1$1(HailClassLoader.scala:10)
at is.hail.asm4s.HailClassLoader.loadOrDefineClass(HailClassLoader.scala:6)
at is.hail.asm4s.ClassesBytes.$anonfun$load$1(ClassBuilder.scala:64)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at is.hail.asm4s.ClassesBytes.load(ClassBuilder.scala:62)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:717)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:710)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$4(BackendUtils.scala:40)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$3(BackendUtils.scala:39)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:761)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Hail version: 0.2.95-513139587f57
Error summary: ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
ERROR:luigi-interface:[pid 9111] Worker Worker(salt=082046728, workers=1, host=ip-172-23-84-150, username=hadoop, pid=9111) failed SeqrVCFToMTTask(source_paths=["s3://seqr-dp-data--prod/vcf/dev/grch38_subset.vcf"], dest_path=s3://seqr-dp-build--prod/mt-hail-luigi/emr-test/grch38_subset_SUCCESS_TO_ES, genome_version=38, array_elements_required=False, vep_runner=VEP, reference_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/all_reference_data/combined_reference_data_grch38.ht, clinvar_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/CLINVAR/clinvar.GRCh38.ht, hgmd_like_csv_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/HGMD_LIKE/GRCh38_HGMD_LIKE_2021_04_Seqr2.csv, hgmd_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/HGMD/hgmd_2021.4_hg38.ht, cidr_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/CIDR.ht, nisc_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/NISC.ht, bgi_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/BGI.ht, hgsc_wes_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/HGSC_WES.ht, hgsc_wgs_ht_path=s3://seqr-dp-data--prod/seqr-reference-data/GRCh38/HGSC_WGS.ht, sample_type=WGS, dont_validate=True, dataset_type=VARIANTS, remap_path=, subset_path=, vep_config_json_path=, grch38_to_grch37_ref_chain=s3://seqr-dp-data--prod/seqr-reference-data/grch38_to_grch37.over.chain.gz)
Traceback (most recent call last):
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/luigi/worker.py", line 199, in run
new_deps = self._run_get_new_deps()
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/luigi/worker.py", line 141, in _run_get_new_deps
task_gen = self.task.run()
File "/home/hadoop/hail-elasticsearch-pipelines/luigi_pipeline/seqr_loading.py", line 108, in run
self.read_vcf_write_mt()
File "/home/hadoop/hail-elasticsearch-pipelines/luigi_pipeline/seqr_loading.py", line 158, in read_vcf_write_mt
mt.write(self.output().path, overwrite=True)
File "<decorator-gen-1154>", line 2, in write
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/hail/matrixtable.py", line 2556, in write
Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/hail/backend/py4j_backend.py", line 104, in execute
self._handle_fatal_error_from_backend(e, ir)
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/hail/backend/backend.py", line 181, in _handle_fatal_error_from_backend
raise err
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/hail/backend/py4j_backend.py", line 98, in execute
result_tuple = self._jbackend.executeEncode(jir, stream_codec)
File "/usr/lib/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/home/hadoop/py3_venv/lib64/python3.7/site-packages/hail/backend/py4j_backend.py", line 31, in deco
raise fatal_error_from_java_error_triplet(deepest, full, error_id) from None
hail.utils.java.FatalError: ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 23 in stage 4.0 failed 10 times, most recent failure: Lost task 23.9 in stage 4.0 (TID 2145) (ip-172-23-82-37.ec2.internal executor 2): java.lang.ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:756)
at java.lang.ClassLoader.defineClass(ClassLoader.java:635)
at is.hail.asm4s.HailClassLoader.liftedTree1$1(HailClassLoader.scala:10)
at is.hail.asm4s.HailClassLoader.loadOrDefineClass(HailClassLoader.scala:6)
at is.hail.asm4s.ClassesBytes.$anonfun$load$1(ClassBuilder.scala:64)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at is.hail.asm4s.ClassesBytes.load(ClassBuilder.scala:62)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:717)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:710)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$4(BackendUtils.scala:40)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$3(BackendUtils.scala:39)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:761)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2470)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2419)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2418)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2418)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1125)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1125)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1125)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2684)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2626)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2615)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:914)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2241)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2262)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2281)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2306)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:321)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:37)
at __C827Compiled.__m859split_CollectDistributedArray_region81_96(Emit.scala)
at __C827Compiled.__m859split_CollectDistributedArray_region19_102(Emit.scala)
at __C827Compiled.__m859split_CollectDistributedArray(Emit.scala)
at __C827Compiled.apply(Emit.scala)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$3(CompileAndEvaluate.scala:57)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:57)
at is.hail.expr.ir.CompileAndEvaluate$.evalToIR(CompileAndEvaluate.scala:30)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:30)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:67)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:72)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:69)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:14)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:64)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:15)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:13)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:13)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:47)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:416)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$2(SparkBackend.scala:452)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:69)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:58)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:310)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$1(SparkBackend.scala:449)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeEncode(SparkBackend.scala:448)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
java.lang.ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:756)
at java.lang.ClassLoader.defineClass(ClassLoader.java:635)
at is.hail.asm4s.HailClassLoader.liftedTree1$1(HailClassLoader.scala:10)
at is.hail.asm4s.HailClassLoader.loadOrDefineClass(HailClassLoader.scala:6)
at is.hail.asm4s.ClassesBytes.$anonfun$load$1(ClassBuilder.scala:64)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
at is.hail.asm4s.ClassesBytes.load(ClassBuilder.scala:62)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:717)
at is.hail.expr.ir.EmitClassBuilder$$anon$1.apply(EmitClassBuilder.scala:710)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$4(BackendUtils.scala:40)
at is.hail.utils.package$.using(package.scala:640)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$3(BackendUtils.scala:39)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:761)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Hail version: 0.2.95-513139587f57
Error summary: ClassFormatError: Too many arguments in method signature in class file __C1091collect_distributed_array
And I get it only if I set all not required reference datasets to None (in the above link it is hgmd, but we have many more of them which increases memory usage however with the old hail and spark versions there were no errors). If I don’t do that I get an out of memory java mmap error which, I believe, could be triggered (or related) by the error that I posted above. I found the following thread that may be related to the issue:
But there is no final fix or answer on it unfortunately.
Update:
After I reset it to spark 2.4.5 and hail 0.2.57 it started to work, so new hail or spark have this issue.