I am trying to load this VCF file (http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/HGSVC2/release/v2.0/integrated_callset/variants_freeze4_sv_insdel_alt.vcf.gz)
With command
ds = hl.import_vcf('/mnt/grid/janowitz/rdata_norepl/sv/pacbio/variants_freeze4_sv_insdel_sym_lilra3.vcf.gz', force_bgz=True, reference_genome='GRCh38')
ds = ds.filter_rows(ds.locus == hl.locus("chr19", 54297003,reference_genome='GRCh38'))
pacbio_entries = ds.entries().to_pandas()
Seeing error:
FatalError: AssertionError: assertion failed
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.0 failed 1 times, most recent failure: Lost task 0.0 in stage 6.0 (TID 5) (bam19.cm.cluster executor driver): java.lang.AssertionError: assertion failed
at scala.Predef$.assert(Predef.scala:208)
at is.hail.annotations.RegionValueBuilder.addBoolean(RegionValueBuilder.scala:223)
at is.hail.io.vcf.VCFLine.parseAddInfo(LoadVCF.scala:973)
at is.hail.io.vcf.LoadVCF$.parseLine(LoadVCF.scala:1439)
at is.hail.io.vcf.LoadVCF$.parseLine(LoadVCF.scala:1315)
at is.hail.io.vcf.MatrixVCFReader.$anonfun$executeGeneric$6(LoadVCF.scala:1753)
at is.hail.io.vcf.MatrixVCFReader.$anonfun$executeGeneric$6$adapted(LoadVCF.scala:1745)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.io.RichContextRDDLong$$anon$3.hasNext(RichContextRDDRegionValue.scala:197)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.expr.ir.TableAggregateByKey$$anon$10.hasNext(TableIR.scala:2616)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.io.RichContextRDDLong$$anon$3.hasNext(RichContextRDDRegionValue.scala:197)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.rvd.RVD$$anon$1.hasNext(RVD.scala:211)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.utils.richUtils.RichContextRDD$$anon$1.hasNext(RichContextRDD.scala:71)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:755)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:345)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2253)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2202)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2201)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2201)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1078)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1078)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1078)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2440)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2382)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2371)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:868)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2202)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2223)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2242)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2267)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:390)
at org.apache.spark.sql.Dataset.$anonfun$collectToPython$1(Dataset.scala:3519)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:772)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3516)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
java.lang.AssertionError: assertion failed
at scala.Predef$.assert(Predef.scala:208)
at is.hail.annotations.RegionValueBuilder.addBoolean(RegionValueBuilder.scala:223)
at is.hail.io.vcf.VCFLine.parseAddInfo(LoadVCF.scala:973)
at is.hail.io.vcf.LoadVCF$.parseLine(LoadVCF.scala:1439)
at is.hail.io.vcf.LoadVCF$.parseLine(LoadVCF.scala:1315)
at is.hail.io.vcf.MatrixVCFReader.$anonfun$executeGeneric$6(LoadVCF.scala:1753)
at is.hail.io.vcf.MatrixVCFReader.$anonfun$executeGeneric$6$adapted(LoadVCF.scala:1745)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:513)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.io.RichContextRDDLong$$anon$3.hasNext(RichContextRDDRegionValue.scala:197)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.expr.ir.TableAggregateByKey$$anon$10.hasNext(TableIR.scala:2616)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.io.RichContextRDDLong$$anon$3.hasNext(RichContextRDDRegionValue.scala:197)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.rvd.RVD$$anon$1.hasNext(RVD.scala:211)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at is.hail.utils.richUtils.RichContextRDD$$anon$1.hasNext(RichContextRDD.scala:71)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:488)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:755)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:345)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:898)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:898)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.67-40d373134612
Error summary: AssertionError: assertion failed