I am having a consistent issue with running into NullPointerException
s that I don’t know how to debug.
The issue arises after the following annotation and filtering step:
ht = ht.explode(ht.microexon_interval_identifier)
event_split = ht.microexon_interval_identifier.first_match_in(r"(HsaEX\d{7}|putative_microexon_\d{9})(?:_(.*))?")
ht = ht.annotate(event=event_split[0], event_region=event_split[1])
ht = ht.annotate(
is_flanking_exon=((ht.microexon_interval_identifier.contains('c1')) |
(ht.microexon_interval_identifier.contains('c2'))) &
(ht.microexon_interval_identifier.startswith('HsaEX')),
is_microexon=(~ht.microexon_interval_identifier.contains('c1')) &
(~ht.microexon_interval_identifier.contains('c2')) &
(ht.microexon_interval_identifier.startswith('HsaEX')),
is_novel_microexon=novel_microexons_set.contains(ht.event)
)
ht = ht.filter(ht.is_flanking_exon | ht.is_microexon | ht.is_novel_microexon)
The traceback is
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_umd.py", line 198, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_imps/_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "/Users/hannes/hail/process_all_dataset_jointly.py", line 129, in <module>
ht = ht.checkpoint("/Users/hannes/Desktop/mssng_cg_checkpoint.ht", overwrite=True)
File "<decorator-gen-1010>", line 2, in checkpoint
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/table.py", line 1238, in checkpoint
self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec)
File "<decorator-gen-1012>", line 2, in write
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/typecheck/check.py", line 577, in wrapper
return __original_func(*args_, **kwargs_)
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/table.py", line 1271, in write
Env.backend().execute(ir.TableWrite(self._tir, ir.TableNativeWriter(output, overwrite, stage_locally, _codec_spec)))
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/backend/py4j_backend.py", line 110, in execute
raise e
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/backend/py4j_backend.py", line 86, in execute
result_tuple = self._jhc.backend().executeEncode(jir, stream_codec)
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/py4j/java_gateway.py", line 1305, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/Users/hannes/opt/anaconda3/envs/hail/lib/python3.7/site-packages/hail/backend/py4j_backend.py", line 31, in deco
'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
hail.utils.java.FatalError: NullPointerException: null
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 6 in stage 4.0 failed 1 times, most recent failure: Lost task 6.0 in stage 4.0 (TID 536) (192.168.4.98 executor driver): java.lang.NullPointerException
at __C1178collect_distributed_array.__m1336ENCODE_SJavaString$_TO_o_binary(Unknown Source)
at __C1178collect_distributed_array.__m1324ENCODE_SInsertFieldsStruct_TO_r_struct_of_o_binaryANDo_int32ANDo_binaryANDo_int32ANDo_int32ANDo_int32ANDo_binaryANDo_float64ANDo_int32ANDo_binaryANDo_boolANDo_binaryANDr_binaryANDr_binaryANDr_struct_of_r_struct_of_r_binaryANDr_int32ENDANDr_array_of_r_binaryENDANDo_binaryANDo_binaryANDr_boolANDr_boolANDo_boolEND(Unknown Source)
at __C1178collect_distributed_array.__m1190split_WritePartition(Unknown Source)
at __C1178collect_distributed_array.apply(Unknown Source)
at __C1178collect_distributed_array.apply(Unknown Source)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:730)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2258)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2207)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2206)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2206)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1079)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1079)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1079)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2445)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2387)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2376)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2196)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2217)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2236)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2261)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:286)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:28)
at __C1151Compiled.apply(Emit.scala)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$3(CompileAndEvaluate.scala:57)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:57)
at is.hail.expr.ir.CompileAndEvaluate$.evalToIR(CompileAndEvaluate.scala:30)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:30)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:67)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:72)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:69)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:14)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:64)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:15)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:13)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:13)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:47)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:381)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$2(SparkBackend.scala:417)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:46)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$1(SparkBackend.scala:414)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeEncode(SparkBackend.scala:413)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:745)
java.lang.NullPointerException: null
at __C1178collect_distributed_array.__m1336ENCODE_SJavaString$_TO_o_binary(Unknown Source)
at __C1178collect_distributed_array.__m1324ENCODE_SInsertFieldsStruct_TO_r_struct_of_o_binaryANDo_int32ANDo_binaryANDo_int32ANDo_int32ANDo_int32ANDo_binaryANDo_float64ANDo_int32ANDo_binaryANDo_boolANDo_binaryANDr_binaryANDr_binaryANDr_struct_of_r_struct_of_r_binaryANDr_int32ENDANDr_array_of_r_binaryENDANDo_binaryANDo_binaryANDr_boolANDr_boolANDo_boolEND(Unknown Source)
at __C1178collect_distributed_array.__m1190split_WritePartition(Unknown Source)
at __C1178collect_distributed_array.apply(Unknown Source)
at __C1178collect_distributed_array.apply(Unknown Source)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$2(BackendUtils.scala:31)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:144)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$1(BackendUtils.scala:30)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:730)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Hail version: 0.2.78-b17627756568
Error summary: NullPointerException: null
I’d be very thankful for any help on how to debug this.