Hey guys,
I have been running the “Gnomad gene models pipeline”
Combine GTF files, HGNC data, and canonical transcripts lists.
hailctl dataproc submit data-prep
–pyfiles ./data/data_utils
./data/prepare_gene_models.py
–gencode 29
/path/to/gencode.v29.gtf.bgz
$CANONICAL_TRANSCRIPTS_GRCH38_PATH
–gencode 19
/path/to/gencode.v19.gtf.bgz
$CANONICAL_TRANSCRIPTS_GRCH37_PATH
–hgnc /path/to/hgnc.tsv
–mane-select-transcripts /path/to/mane_summary.tsv.gz
–output /path/to/genes.ht
I my job cannot finish as I keep getting error as below: I am not sure how do I fix this.
"
Exception in thread “RemoteBlock-temp-file-clean-thread” java.lang.OutOfMemoryError: Java heap space
Exception in thread “Spark Context Cleaner” java.lang.OutOfMemoryError: Java heap space
Traceback (most recent call last):
File “/home/mamta/Test/gnomad-browser/data/prepare_gene_models.py”, line 348, in
main()
File “/home/mamta/Test/gnomad-browser/data/prepare_gene_models.py”, line 213, in main
gencode_genes = load_gencode_gene_models(gtf_path, min_partitions=args.min_partitions)
File “/home/mamta/Test/gnomad-browser/data/prepare_gene_models.py”, line 187, in load_gencode_gene_models
genes = genes.cache()
File “/home/mamta/.local/lib/python3.6/site-packages/hail/table.py”, line 1742, in cache
return self.persist(‘MEMORY_ONLY’)
File “”, line 2, in persist
File “/home/mamta/.local/lib/python3.6/site-packages/hail/typecheck/check.py”, line 585, in wrapper
return original_func(*args, **kwargs)
File “/home/mamta/.local/lib/python3.6/site-packages/hail/table.py”, line 1780, in persist
return Env.backend().persist_table(self, storage_level)
File “/home/mamta/.local/lib/python3.6/site-packages/hail/backend/backend.py”, line 227, in persist_table
return Table._from_java(self._to_java_ir(t._tir).pyPersist(storage_level))
File “/home/mamta/.local/lib/python3.6/site-packages/py4j/java_gateway.py”, line 1257, in call
answer, self.gateway_client, self.target_id, self.name)
File “/home/mamta/.local/lib/python3.6/site-packages/hail/utils/java.py”, line 211, in deco
‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
hail.utils.java.FatalError: SparkException: Job aborted due to stage failure: Task 9 in stage 4.0 failed 1 times, most recent failure: Lost task 9.0 in stage 4.0 (TID 137, localhost, executor driver): ExecutorLostFailure (executor driver exited caused by one of the running tasks) Reason: Executor heartbeat timed out after 137617 ms
Driver stacktrace:
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 9 in stage 4.0 failed 1 times, most recent failure: Lost task 9.0 in stage 4.0 (TID 137, localhost, executor driver): ExecutorLostFailure (executor driver exited caused by one of the running tasks) Reason: Executor heartbeat timed out after 137617 ms
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)
at is.hail.sparkextras.ContextRDD.crunJobWithIndex(ContextRDD.scala:228)
at is.hail.rvd.RVD$.getKeyInfo(RVD.scala:1226)
at is.hail.rvd.RVD$.makeCoercer(RVD.scala:1301)
at is.hail.rvd.RVD$.coerce(RVD.scala:1256)
at is.hail.rvd.RVD$.coerce(RVD.scala:1240)
at is.hail.expr.ir.TableKeyByAndAggregate.execute(TableIR.scala:1733)
at is.hail.expr.ir.TableLeftJoinRightDistinct.execute(TableIR.scala:1062)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1088)
at is.hail.expr.ir.TableLeftJoinRightDistinct.execute(TableIR.scala:1061)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1088)
at is.hail.expr.ir.Interpret$.apply(Interpret.scala:23)
at is.hail.expr.ir.TableIR$$anonfun$persist$1.apply(TableIR.scala:53)
at is.hail.expr.ir.TableIR$$anonfun$persist$1.apply(TableIR.scala:52)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:15)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:13)
at is.hail.utils.package$.using(package.scala:604)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:13)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:10)
at is.hail.expr.ir.TableIR.persist(TableIR.scala:52)
at is.hail.expr.ir.TableIR.pyPersist(TableIR.scala:72)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.37-7952b436bd70