I realised in my last response I accidentally omitted the cell where I filtered NaN phenotypes, but I did do this using:
# remove individuals with NaN BMI values
ph_filtered = ph.filter(hl.is_nan(ph.BMI),keep=False)
pprint(ph.count()-ph_filtered.count())
And then joined ph_filtered to my matrixtable.
I wondered if NaN p-values might also be the issue with the plotting, so I tried the same technique of filtering for these. Using count or collect or table.any to try to find out how many p-values are NaN results in memory errors so it’s not clear whether there actually are any NaN p-values, but I filtered them anyway and doing a manhattan plot with a very low sampling rate to try to just get something to plot, as follows:
# remove NaN p values
gwas_filtered = gwas.filter(hl.is_nan(gwas.p_value),keep=False)
# manhattan plot
m = hl.plot.manhattan(gwas_filtered.p_value, n_divisions=10)
show(m)
And I get this error:
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-25-8cf686129d78> in <module>
1 # manhattan plot
----> 2 m = hl.plot.manhattan(gwas_filtered.p_value, n_divisions=10)
3 show(m)
</home/ch283/chloe/lib/python3.7/site-packages/decorator.py:decorator-gen-1567> in manhattan(pvals, locus, title, size, hover_fields, collect_all, n_divisions, significance_line)
~/chloe/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
583 def wrapper(__original_func, *args, **kwargs):
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585 return __original_func(*args_, **kwargs_)
586
587 return wrapper
~/chloe/lib/python3.7/site-packages/hail/plot/plots.py in manhattan(pvals, locus, title, size, hover_fields, collect_all, n_divisions, significance_line)
1378 ('_pval', pvals),
1379 fields=hover_fields,
-> 1380 n_divisions=None if collect_all else n_divisions
1381 )
1382 source_pd['p_value'] = [10 ** (-p) for p in source_pd['_pval']]
~/chloe/lib/python3.7/site-packages/hail/plot/plots.py in _collect_scatter_plot_data(x, y, fields, n_divisions, missing_label)
715
716 agg_f = x[1]._aggregation_method()
--> 717 res = agg_f(hail.agg.downsample(x[1], y[1], label=list(expressions.values()) if expressions else None, n_divisions=n_divisions))
718 source_pd = pd.DataFrame([
719 dict(
</home/ch283/chloe/lib/python3.7/site-packages/decorator.py:decorator-gen-1047> in aggregate(self, expr, _localize)
~/chloe/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
583 def wrapper(__original_func, *args, **kwargs):
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585 return __original_func(*args_, **kwargs_)
586
587 return wrapper
~/chloe/lib/python3.7/site-packages/hail/table.py in aggregate(self, expr, _localize)
1143
1144 if _localize:
-> 1145 return Env.backend().execute(agg_ir)
1146 else:
1147 return construct_expr(agg_ir, expr.dtype)
~/chloe/lib/python3.7/site-packages/hail/backend/backend.py in execute(self, ir, timed)
107
108 def execute(self, ir, timed=False):
--> 109 result = json.loads(Env.hc()._jhc.backend().executeJSON(self._to_java_ir(ir)))
110 value = ir.typ._from_json(result['value'])
111 timings = result['timings']
~/chloe/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
~/chloe/lib/python3.7/site-packages/hail/utils/java.py in deco(*args, **kwargs)
223 raise FatalError('%s\n\nJava stack trace:\n%s\n'
224 'Hail version: %s\n'
--> 225 'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
226 except pyspark.sql.utils.CapturedException as e:
227 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: SparkException: Job 12 cancelled because SparkContext was shut down
Java stack trace:
java.lang.RuntimeException: error while applying lowering 'InterpretNonCompilable'
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:26)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:18)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:18)
at is.hail.expr.ir.CompileAndEvaluate$$anonfun$apply$1.apply(CompileAndEvaluate.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:14)
at is.hail.backend.Backend$$anonfun$execute$1.apply(Backend.scala:56)
at is.hail.backend.Backend$$anonfun$execute$1.apply(Backend.scala:56)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:10)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:9)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:9)
at is.hail.backend.Backend.execute(Backend.scala:56)
at is.hail.backend.Backend.executeJSON(Backend.scala:62)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
org.apache.spark.SparkException: Job 12 cancelled because SparkContext was shut down
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:932)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:930)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:930)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:2128)
at org.apache.spark.util.EventLoop.stop(EventLoop.scala:84)
at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:2041)
at org.apache.spark.SparkContext$$anonfun$stop$6.apply$mcV$sp(SparkContext.scala:1949)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1340)
at org.apache.spark.SparkContext.stop(SparkContext.scala:1948)
at org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:575)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1945)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2158)
at is.hail.rvd.RVD.combine(RVD.scala:605)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:618)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:54)
at is.hail.expr.ir.InterpretNonCompilable$.interpretAndCoerce$1(InterpretNonCompilable.scala:16)
at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:58)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.transform(LoweringPass.scala:48)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:13)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:11)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:11)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.apply(LoweringPass.scala:43)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:20)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:18)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:18)
at is.hail.expr.ir.CompileAndEvaluate$$anonfun$apply$1.apply(CompileAndEvaluate.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:69)
at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:14)
at is.hail.backend.Backend$$anonfun$execute$1.apply(Backend.scala:56)
at is.hail.backend.Backend$$anonfun$execute$1.apply(Backend.scala:56)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:10)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:9)
at is.hail.utils.package$.using(package.scala:596)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:9)
at is.hail.backend.Backend.execute(Backend.scala:56)
at is.hail.backend.Backend.executeJSON(Backend.scala:62)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.30-2ae07d872f43
Error summary: SparkException: Job 12 cancelled because SparkContext was shut down
----------------------------------------
Exception happened during processing of request from ('127.0.0.1', 57752)
Traceback (most recent call last):
File "/usr/lib/python3.7/socketserver.py", line 316, in _handle_request_noblock
self.process_request(request, client_address)
File "/usr/lib/python3.7/socketserver.py", line 347, in process_request
self.finish_request(request, client_address)
File "/usr/lib/python3.7/socketserver.py", line 360, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/usr/lib/python3.7/socketserver.py", line 720, in __init__
self.handle()
File "/home/ch283/chloe/lib/python3.7/site-packages/pyspark/accumulators.py", line 269, in handle
poll(accum_updates)
File "/home/ch283/chloe/lib/python3.7/site-packages/pyspark/accumulators.py", line 241, in poll
if func():
File "/home/ch283/chloe/lib/python3.7/site-packages/pyspark/accumulators.py", line 245, in accum_updates
num_updates = read_int(self.rfile)
File "/home/ch283/chloe/lib/python3.7/site-packages/pyspark/serializers.py", line 717, in read_int
raise EOFError
EOFError
----------------------------------------
I can’t really make head or tail of this, so any insight would be appreciated! I am now using a cluster with Ubuntu 19.10.