Hi,
I am trying to run PC-Relate followed by hl.maximal_independent_set on a curated set of SNPs from UK biobank dataset, this resulted in the error message below, which I think means the task ran out of memory. There is 480GB already assigned to the job on the cluster so I am unsure how to increase the memory allowance.
Script:
import hail as hl
import os
from gnomad.utils.liftover import *
from gnomad.utils.annotations import *
from gnomad.sample_qc.pipeline import *
#Define memory and CPU availability
tmp = "/mnt/grid/janowitz/home/skleeman/tmp"
os.environ["SPARK_LOCAL_DIRS"]=tmp
os.environ["PYSPARK_SUBMIT_ARGS"] ="--master local[48] --driver-memory 480g pyspark-shell"
hl.init(default_reference='GRCh38', master ='local[48]', local='local[48]',min_block_size=128, local_tmpdir=tmp, tmp_dir=tmp)
ukb = hl.read_matrix_table('/mnt/grid/ukbiobank/data/Application58510/skleeman/ukb_grch38_pruned_intersect.mt')
Find related individuals in UKB set
relatedness_ht_ukb = hl.pc_relate(ukb.GT, min_individual_maf=0.05, k=10,
min_kinship=0.1, statistics='kin')
related_samples_to_remove_ukb = hl.maximal_independent_set(relatedness_ht_ukb.i, relatedness_ht_ukb.j, False)
Error:
SparkUI available at http://bam22.cm.cluster:4040
Welcome to
__ __ <>__
/ /_/ /__ __/ /
/ __ / _ `/ / /
/_/ /_/\_,_/_/_/ version 0.2.61-0a87b16930d6
LOGGING: writing to /mnt/grid/janowitz/home/skleeman/ukbiobank/cancergwas/hail-20201217-1859-0.2.61-0a87b16930d6.log
2020-12-17 19:10:39 Hail: INFO: hwe_normalized_pca: running PCA using 34947 variants.
2020-12-17 19:11:10 Hail: INFO: pca: running PCA with 10 components...
2020-12-17 19:25:20 Hail: INFO: Wrote all 1080 blocks of 34947 x 488377 matrix with block size 4096.
2020-12-17 19:27:19 Hail: INFO: wrote matrix with 11 rows and 34947 columns as 9 blocks of size 4096 to /mnt/grid/janowitz/home/skleeman/tmp/pcrelate-write-read-uTx9cpc32PgSLVDfX3G4gp.bm
2020-12-17 19:28:48 Hail: INFO: wrote matrix with 34947 rows and 488377 columns as 1080 blocks of size 4096 to /mnt/grid/janowitz/home/skleeman/tmp/pcrelate-write-read-c4fzfdnjzcdkcGwBEqDGPn.bm
Traceback (most recent call last):
File "pca_ref.py", line 34, in <module>
related_samples_to_remove_ukb = hl.maximal_independent_set(relatedness_ht_ukb.i, relatedness_ht_ukb.j, False)
File "<decorator-gen-1377>", line 2, in maximal_independent_set
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/typecheck/check.py", line 614, in wrapper
return __original_func(*args_, **kwargs_)
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/methods/misc.py", line 151, in maximal_independent_set
edges.write(edges_path)
File "<decorator-gen-1095>", line 2, in write
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/typecheck/check.py", line 614, in wrapper
return __original_func(*args_, **kwargs_)
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/table.py", line 1271, in write
Env.backend().execute(ir.TableWrite(self._tir, ir.TableNativeWriter(output, overwrite, stage_locally, _codec_spec)))
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/backend/py4j_backend.py", line 98, in execute
raise e
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/backend/py4j_backend.py", line 74, in execute
result = json.loads(self._jhc.backend().executeJSON(jir))
File "/grid/wsbs/home_norepl/skleeman/.local/lib/python3.7/site-packages/py4j/java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/grid/wsbs/home_norepl/skleeman/hail/hail/python/hail/backend/py4j_backend.py", line 32, in deco
'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
hail.utils.java.FatalError: SparkException: Job 203 cancelled because SparkContext was shut down
Java stack trace:
org.apache.spark.SparkException: Job 203 cancelled because SparkContext was shut down
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:933)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:931)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:931)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:2130)
at org.apache.spark.util.EventLoop.stop(EventLoop.scala:84)
at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:2043)
at org.apache.spark.SparkContext$$anonfun$stop$6.apply$mcV$sp(SparkContext.scala:1949)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1340)
at org.apache.spark.SparkContext.stop(SparkContext.scala:1948)
at org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:575)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1945)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
at is.hail.sparkextras.ContextRDD.collect(ContextRDD.scala:166)
at is.hail.utils.richUtils.RichContextRDD.writePartitions(RichContextRDD.scala:109)
at is.hail.utils.richUtils.RichRDD$.writePartitions$extension(RichRDD.scala:204)
at is.hail.linalg.BlockMatrix.write(BlockMatrix.scala:872)
at is.hail.methods.PCRelate.writeRead(PCRelate.scala:159)
at is.hail.methods.PCRelate.gram(PCRelate.scala:165)
at is.hail.methods.PCRelate.phi(PCRelate.scala:227)
at is.hail.methods.PCRelate.computeResult(PCRelate.scala:184)
at is.hail.methods.PCRelate.execute(PCRelate.scala:146)
at is.hail.expr.ir.BlockMatrixToTableApply.execute(TableIR.scala:2784)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1845)
at is.hail.expr.ir.TableKeyBy.execute(TableIR.scala:1209)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1845)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:825)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.interpretAndCoerce$1(InterpretNonCompilable.scala:16)
at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:58)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.transform(LoweringPass.scala:67)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:15)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:13)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.apply(LoweringPass.scala:62)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:14)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:12)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:28)
at is.hail.backend.spark.SparkBackend.is$hail$backend$spark$SparkBackend$$_execute(SparkBackend.scala:354)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:338)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:335)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:25)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:23)
at is.hail.utils.package$.using(package.scala:618)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:23)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:247)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:335)
at is.hail.backend.spark.SparkBackend$$anonfun$7.apply(SparkBackend.scala:379)
at is.hail.backend.spark.SparkBackend$$anonfun$7.apply(SparkBackend.scala:377)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:377)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
I was wondering if you had any suggestions regarding this.
With best wishes,
Sam Kleeman