I used the following line of code:
vds = hl.sample_qc(vds)
vds.describe()
And I got the following error:
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-450-39773d1a4878> in <module>()
----> 1 vds = hl.sample_qc(vds)
2 vds.describe()
/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
--> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)
/home/hail/hail.zip/hail/methods/qc.py in sample_qc(dataset, name)
89 """
90
---> 91 return MatrixTable(Env.hail().methods.SampleQC.apply(require_biallelic(dataset, 'sample_qc')._jvds, name))
92
93
/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/home/hail/hail.zip/hail/utils/java.py in deco(*args, **kwargs)
194 raise FatalError('%s\n\nJava stack trace:\n%s\n'
195 'Hail version: %s\n'
--> 196 'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
197 except pyspark.sql.utils.CapturedException as e:
198 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: HailException: invalid ploidy: 1. Only support ploidy == 2
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 8841 in stage 249.0 failed 20 times, most recent failure: Lost task 8841.19 in stage 249.0 (TID 30935, nneka-w-1.c.avl-hail-ines.internal, executor 14): is.hail.utils.HailException: invalid ploidy: 1. Only support ploidy == 2
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:6)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.variant.Call$.allelePair(Call.scala:100)
at is.hail.methods.SampleQC$$anonfun$results$1$$anonfun$apply$1.apply(SampleQC.scala:188)
at is.hail.methods.SampleQC$$anonfun$results$1$$anonfun$apply$1.apply(SampleQC.scala:175)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at is.hail.methods.SampleQC$$anonfun$results$1.apply(SampleQC.scala:175)
at is.hail.methods.SampleQC$$anonfun$results$1.apply(SampleQC.scala:170)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18.apply(ContextRDD.scala:291)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18.apply(ContextRDD.scala:291)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18$$anonfun$apply$19.apply(ContextRDD.scala:291)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18$$anonfun$apply$19.apply(ContextRDD.scala:291)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:438)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$17.apply(RDD.scala:1041)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$17.apply(RDD.scala:1040)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$18.apply(RDD.scala:1047)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$18.apply(RDD.scala:1047)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1026)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.reduce(RDD.scala:1008)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1128)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1.apply(RDD.scala:1059)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.treeReduce(RDD.scala:1037)
at is.hail.methods.SampleQC$.results(SampleQC.scala:207)
at is.hail.methods.SampleQC$.apply(SampleQC.scala:222)
at is.hail.methods.SampleQC.apply(SampleQC.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)is.hail.utils.HailException: invalid ploidy: 1. Only support ploidy == 2
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:6)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.variant.Call$.allelePair(Call.scala:100)
at is.hail.methods.SampleQC$$anonfun$results$1$$anonfun$apply$1.apply(SampleQC.scala:188)
at is.hail.methods.SampleQC$$anonfun$results$1$$anonfun$apply$1.apply(SampleQC.scala:175)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at is.hail.methods.SampleQC$$anonfun$results$1.apply(SampleQC.scala:175)
at is.hail.methods.SampleQC$$anonfun$results$1.apply(SampleQC.scala:170)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18.apply(ContextRDD.scala:291)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18.apply(ContextRDD.scala:291)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18$$anonfun$apply$19.apply(ContextRDD.scala:291)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$18$$anonfun$apply$19.apply(ContextRDD.scala:291)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:438)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$17.apply(RDD.scala:1041)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$17.apply(RDD.scala:1040)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$18.apply(RDD.scala:1047)
at org.apache.spark.rdd.RDD$$anonfun$treeReduce$1$$anonfun$18.apply(RDD.scala:1047)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:96)
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: devel-aa83f2a1d041
Error summary: HailException: invalid ploidy: 1. Only support ploidy == 2
When I run this code on a subset of the MatrixTable (18000 Variants, 50 Samples) I do not get an error.
This error is related to deficiencies in the sample_qc method. It’s on deck to be rewritten in Python and adjusted slightly, which will make it very easy to fix this problem. I’ve made an issue to track: https://github.com/hail-is/hail/issues/3900
@tpoterba
Would this error be because of the same problem:
def qqplot(pvals):
spvals = sorted(filter(lambda x: x and not (isnan(x)), pvals))
exp = [-log(float(i) / len(spvals), 10) for i in np.arange(1, len(spvals) + 1, 1)]
obs = [-log(p, 10) for p in spvals]
p = figure(title="Q Q Plot",
x_axis_label = 'Expected P-Value (-log10 scale)',
y_axis_label = 'Observe P-Value (-log10 scale)')
p.scatter(x=exp, y=obs, color = 'black')
bound = max(max(exp), max(obs)) * 1.1
p.line([0, bound], [0, bound], color = 'red')
show(p)
qqplot(gwas_logistic.logreg.p_value.collect())
pprint(gwas_logistic.row.logreg.p_value.collect())
Error is:
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-163-4847c0afde13> in <module>()
12 #qqplot(gwas_logistic.logreg.p_value.collect())
13
---> 14 pprint(gwas_logistic.row.logreg.p_value.collect())
/home/hail/hail.zip/hail/expr/expressions/base_expression.py in collect(self)
763 uid = Env.get_uid()
764 t = self._to_table(uid)
--> 765 return [r[uid] for r in t._select("collect", hl.struct(), hl.struct(**{uid: t[uid]})).collect()]
766
767 @property
/home/hail/hail.zip/hail/table.py in collect(self)
1546 List of rows.
1547 """
-> 1548 return hl.tarray(self.row.dtype)._from_json(self._jt.collectJSON())
1549
1550 def describe(self):
/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/home/hail/hail.zip/hail/utils/java.py in deco(*args, **kwargs)
194 raise FatalError('%s\n\nJava stack trace:\n%s\n'
195 'Hail version: %s\n'
--> 196 'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
197 except pyspark.sql.utils.CapturedException as e:
198 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: HailException: Only support ploidy == 2 and unphased. Found 0|0.
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 142.0 failed 20 times, most recent failure: Lost task 1.19 in stage 142.0 (TID 28656, nneka-w-1.c.avl-hail-ines.internal, executor 1): is.hail.utils.HailException: Only support ploidy == 2 and unphased. Found 0|0.
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:6)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.variant.Call$.unphasedDiploidGtIndex(Call.scala:111)
at is.hail.methods.VariantQCCombiner.mergeGT(VariantQC.scala:20)
at is.hail.methods.VariantQC$$anonfun$apply$2.apply(VariantQC.scala:147)
at is.hail.methods.VariantQC$$anonfun$apply$2.apply(VariantQC.scala:140)
at is.hail.variant.MatrixTable$$anonfun$73$$anonfun$apply$55$$anonfun$apply$5.apply$mcV$sp(MatrixTable.scala:1551)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$1.apply(TStruct.scala:191)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$1.apply(TStruct.scala:191)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$3.apply(TStruct.scala:238)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$3.apply(TStruct.scala:228)
at is.hail.variant.MatrixTable$$anonfun$73$$anonfun$apply$55.apply(MatrixTable.scala:1550)
at is.hail.variant.MatrixTable$$anonfun$73$$anonfun$apply$55.apply(MatrixTable.scala:1546)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2050)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2069)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
at is.hail.table.Table.collect(Table.scala:884)
at is.hail.table.Table.collectJSON(Table.scala:887)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)is.hail.utils.HailException: Only support ploidy == 2 and unphased. Found 0|0.
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:6)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.variant.Call$.unphasedDiploidGtIndex(Call.scala:111)
at is.hail.methods.VariantQCCombiner.mergeGT(VariantQC.scala:20)
at is.hail.methods.VariantQC$$anonfun$apply$2.apply(VariantQC.scala:147)
at is.hail.methods.VariantQC$$anonfun$apply$2.apply(VariantQC.scala:140)
at is.hail.variant.MatrixTable$$anonfun$73$$anonfun$apply$55$$anonfun$apply$5.apply$mcV$sp(MatrixTable.scala:1551)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$1.apply(TStruct.scala:191)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$1.apply(TStruct.scala:191)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$3.apply(TStruct.scala:238)
at is.hail.expr.types.TStruct$$anonfun$unsafeInsert$3.apply(TStruct.scala:228)
at is.hail.variant.MatrixTable$$anonfun$73$$anonfun$apply$55.apply(MatrixTable.scala:1550)
at is.hail.variant.MatrixTable$$anonfun$73$$anonfun$apply$55.apply(MatrixTable.scala:1546)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:927)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:921)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: devel-aa83f2a1d041
Error summary: HailException: Only support ploidy == 2 and unphased. Found 0|0.
that’s erroring out on variant qc, which should work if you update to the latest version.
By latest version you mean Hail 0.2?
the latest version of Hail 0.2. You can get it here: https://hail.is/docs/devel/getting_started.html
I’d recommend updating every week or so.
@tpoterba
When I updated to the latest version of Hail 0.2 my matrix table took over 30 minutes to import. Now it is taking longer. I am using the following link of code:
## reading in phase 1
vds = hl.read_matrix_table('gs://1k-genome/1000-genomes/VDS-of-all/ALL.chr.integrated_phase1_v3.20101123.snps_indels_svs.genotypes.mt')
print('The counts are: ', vds.count())
those two lines are taking 30 minutes? Something seems wrong, then. That should be almost instantaneous.
Many times it works quickly, sometimes not at all, and sometimes very slow.