Java Heap Space out of memory

I was running these lines

gwas_par = hl.linear_regression_rows(y=mt_par.pheno.Hour_of_death,
                                 x=mt_par.GT.n_alt_alleles(),
                                 covariates=[1.0])
gwas_par.row.describe()

I have tried to allocate more memory space like using:

hl.init(min_block_size=128)

or

docker1 run -dit -p 8017:8017 biohpc_${USER}/hail-0.2.53 bash -c "export PYSPARK_SUBMIT_ARGS="–driver-memory 16g pyspark-shell"; jupyter notebook --NotebookApp.max_buffer_size=4000000000 --ip=0.0.0.0 --port=8017 --no-browser --allow-root >& /workdir/jupyter.log "

But it still reports the same error

BTW, s it possible for us to have a zoom meeting so that you can control my screen. I guess it will solve the problem more efficiently

---------------------------------------------------------------------------
FatalError                                Traceback (most recent call last)
<ipython-input-12-2918ae4e3258> in <module>
      1 gwas_par = hl.linear_regression_rows(y=mt_par.pheno.Hour_of_death,
      2                                  x=mt_par.GT.n_alt_alleles(),
----> 3                                  covariates=[1.0])
      4 gwas_par.row.describe()

<decorator-gen-1545> in linear_regression_rows(y, x, covariates, block_size, pass_through)

/usr/local/lib/python3.6/dist-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
    612     def wrapper(__original_func, *args, **kwargs):
    613         args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 614         return __original_func(*args_, **kwargs_)
    615 
    616     return wrapper

/usr/local/lib/python3.6/dist-packages/hail/methods/statgen.py in linear_regression_rows(y, x, covariates, block_size, pass_through)
    453         ht_result = ht_result.annotate(**{f: ht_result[f][0] for f in fields})
    454 
--> 455     return ht_result.persist()
    456 
    457 

<decorator-gen-1101> in persist(self, storage_level)

/usr/local/lib/python3.6/dist-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
    612     def wrapper(__original_func, *args, **kwargs):
    613         args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 614         return __original_func(*args_, **kwargs_)
    615 
    616     return wrapper

/usr/local/lib/python3.6/dist-packages/hail/table.py in persist(self, storage_level)
   1834             Persisted table.
   1835         """
-> 1836         return Env.backend().persist_table(self, storage_level)
   1837 
   1838     def unpersist(self) -> 'Table':

/usr/local/lib/python3.6/dist-packages/hail/backend/spark_backend.py in persist_table(self, t, storage_level)
    313 
    314     def persist_table(self, t, storage_level):
--> 315         return Table._from_java(self._jbackend.pyPersistTable(storage_level, self._to_java_table_ir(t._tir)))
    316 
    317     def unpersist_table(self, t):

/usr/local/lib/python3.6/dist-packages/py4j/java_gateway.py in __call__(self, *args)
   1255         answer = self.gateway_client.send_command(command)
   1256         return_value = get_return_value(
-> 1257             answer, self.gateway_client, self.target_id, self.name)
   1258 
   1259         for temp_arg in temp_args:

/usr/local/lib/python3.6/dist-packages/hail/backend/spark_backend.py in deco(*args, **kwargs)
     39             raise FatalError('%s\n\nJava stack trace:\n%s\n'
     40                              'Hail version: %s\n'
---> 41                              'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
     42         except pyspark.sql.utils.CapturedException as e:
     43             raise FatalError('%s\n\nJava stack trace:\n%s\n'

FatalError: OutOfMemoryError: GC overhead limit exceeded

Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 12 in stage 5.0 failed 1 times, most recent failure: Lost task 12.0 in stage 5.0 (TID 65, localhost, executor driver): java.lang.OutOfMemoryError: GC overhead limit exceeded

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158)
	at is.hail.rvd.RVD.combine(RVD.scala:724)
	at is.hail.expr.ir.Interpret$.run(Interpret.scala:935)
	at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:53)
	at is.hail.expr.ir.InterpretNonCompilable$.interpretAndCoerce$1(InterpretNonCompilable.scala:16)
	at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:53)
	at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:41)
	at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:58)
	at is.hail.expr.ir.lowering.LegacyInterpretNonCompilablePass$.transform(LoweringPass.scala:58)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
	at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:15)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:13)
	at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
	at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:13)
	at is.hail.expr.ir.lowering.LegacyInterpretNonCompilablePass$.apply(LoweringPass.scala:53)
	at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:14)
	at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:12)
	at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
	at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
	at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
	at is.hail.expr.ir.Interpret$.apply(Interpret.scala:22)
	at is.hail.backend.spark.SparkBackend$$anonfun$pyPersistTable$1.apply(SparkBackend.scala:403)
	at is.hail.backend.spark.SparkBackend$$anonfun$pyPersistTable$1.apply(SparkBackend.scala:402)
	at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:20)
	at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:18)
	at is.hail.utils.package$.using(package.scala:609)
	at is.hail.annotations.Region$.scoped(Region.scala:18)
	at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:18)
	at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:230)
	at is.hail.backend.spark.SparkBackend.pyPersistTable(SparkBackend.scala:402)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)

java.lang.OutOfMemoryError: GC overhead limit exceeded
	at 




Hail version: 0.2.53-96ec0aef3db7
Error summary: OutOfMemoryError: GC overhead limit exceededemphasized text

So I asked the staff of the cloud computing cluster that I am using, they replied me with this:

I won’t be able to help you on this. Only the software developer knows how to pass the memory setting to Spark

You have to set --executor-memory 16g as well. --driver-memory only sets the driver’s memory. You probably should also set init(master='local[NUMBER_OF_CORES]', ...)

So could you please list all the things that I need to set? How many number of cores should I set?

docker1 run -dit -p 8017:8017 biohpc_${USER}/hail-0.2.53 bash -c "export PYSPARK_SUBMIT_ARGS=‘–driver-memory 16g pyspark-shell’
; jupyter notebook --NotebookApp.max_buffer_size=4000000000 --ip=0.0.0.0 --port=8017 --no-browser --allow-root >& /workdir/jupyter.log "
docker1 run -dit -p 8017:8017 biohpc_${USER}/hail-0.2.53 bash -c "export PYSPARK_SUBMIT_ARGS='--executor-memory 16g pyspark-shell'; jupyter notebook --NotebookApp.max_buffer_size=4000000000 --ip=0.0.0.0 --port=8017 --no-browser --allow-root >& /workdir/jupyter.log "
hl.init(min_block_size=128,master='local[32]')

I have set as above. Now I get a new error

FatalError                                Traceback (most recent call last)
<ipython-input-11-2918ae4e3258> in <module>
      1 gwas_par = hl.linear_regression_rows(y=mt_par.pheno.Hour_of_death,
      2                                  x=mt_par.GT.n_alt_alleles(),
----> 3                                  covariates=[1.0])
      4 gwas_par.row.describe()

<decorator-gen-1545> in linear_regression_rows(y, x, covariates, block_size, pass_through)

/usr/local/lib/python3.6/dist-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
    612     def wrapper(__original_func, *args, **kwargs):
    613         args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 614         return __original_func(*args_, **kwargs_)
    615 
    616     return wrapper

/usr/local/lib/python3.6/dist-packages/hail/methods/statgen.py in linear_regression_rows(y, x, covariates, block_size, pass_through)
    453         ht_result = ht_result.annotate(**{f: ht_result[f][0] for f in fields})
    454 
--> 455     return ht_result.persist()
    456 
    457 

<decorator-gen-1101> in persist(self, storage_level)

/usr/local/lib/python3.6/dist-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
    612     def wrapper(__original_func, *args, **kwargs):
    613         args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 614         return __original_func(*args_, **kwargs_)
    615 
    616     return wrapper

/usr/local/lib/python3.6/dist-packages/hail/table.py in persist(self, storage_level)
   1834             Persisted table.
   1835         """
-> 1836         return Env.backend().persist_table(self, storage_level)
   1837 
   1838     def unpersist(self) -> 'Table':

/usr/local/lib/python3.6/dist-packages/hail/backend/spark_backend.py in persist_table(self, t, storage_level)
    313 
    314     def persist_table(self, t, storage_level):
--> 315         return Table._from_java(self._jbackend.pyPersistTable(storage_level, self._to_java_table_ir(t._tir)))
    316 
    317     def unpersist_table(self, t):

/usr/local/lib/python3.6/dist-packages/py4j/java_gateway.py in __call__(self, *args)
   1255         answer = self.gateway_client.send_command(command)
   1256         return_value = get_return_value(
-> 1257             answer, self.gateway_client, self.target_id, self.name)
   1258 
   1259         for temp_arg in temp_args:

/usr/local/lib/python3.6/dist-packages/hail/backend/spark_backend.py in deco(*args, **kwargs)
     39             raise FatalError('%s\n\nJava stack trace:\n%s\n'
     40                              'Hail version: %s\n'
---> 41                              'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
     42         except pyspark.sql.utils.CapturedException as e:
     43             raise FatalError('%s\n\nJava stack trace:\n%s\n'

FatalError: SparkException: Job 4 cancelled because SparkContext was shut down

Java stack trace:
org.apache.spark.SparkException: Job 4 cancelled because SparkContext was shut down
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:932)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:930)
	at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
	at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:930)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:2128)
	at org.apache.spark.util.EventLoop.stop(EventLoop.scala:84)
	at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:2041)
	at org.apache.spark.SparkContext$$anonfun$stop$6.apply$mcV$sp(SparkContext.scala:1949)
	at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1340)
	at org.apache.spark.SparkContext.stop(SparkContext.scala:1948)
	at org.apache.spark.SparkContext$$anon$3.run(SparkContext.scala:1903)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158)
	at is.hail.rvd.RVD.combine(RVD.scala:724)
	at is.hail.expr.ir.Interpret$.run(Interpret.scala:935)
	at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:53)
	at is.hail.expr.ir.InterpretNonCompilable$.interpretAndCoerce$1(InterpretNonCompilable.scala:16)
	at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:53)
	at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:41)
	at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:58)
	at is.hail.expr.ir.lowering.LegacyInterpretNonCompilablePass$.transform(LoweringPass.scala:58)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
	at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:15)
	at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:13)
	at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
	at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:13)
	at is.hail.expr.ir.lowering.LegacyInterpretNonCompilablePass$.apply(LoweringPass.scala:53)
	at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:14)
	at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:12)
	at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
	at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
	at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
	at is.hail.expr.ir.Interpret$.apply(Interpret.scala:22)
	at is.hail.backend.spark.SparkBackend$$anonfun$pyPersistTable$1.apply(SparkBackend.scala:403)
	at is.hail.backend.spark.SparkBackend$$anonfun$pyPersistTable$1.apply(SparkBackend.scala:402)
	at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:20)
	at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:18)
	at is.hail.utils.package$.using(package.scala:609)
	at is.hail.annotations.Region$.scoped(Region.scala:18)
	at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:18)
	at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:230)
	at is.hail.backend.spark.SparkBackend.pyPersistTable(SparkBackend.scala:402)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)



Hail version: 0.2.53-96ec0aef3db7
Error summary: SparkException: Job 4 cancelled because SparkContext was shut down

You should set executor memory and driver memory in the same PYSPARK_SUBMIT_ARGS, like this:

export PYSPARK_SUBMIT_ARGS='--driver-memory 16g --executor-memory 16g pyspark-shell'
jupyter ...

That error usually means the real error is in the log file. Can you attach the log file here? The log file location is printed when Hail first starts.