I am facing the following error while converting my mt into pandas df;
FatalError Traceback (most recent call last)
in
121 n_hets=ds_result7.n_het,
122 homs=ds_result7.homs,
β 123 n_homs=ds_result7.n_hom
124 ).to_pandas()
125
in to_pandas(self, flatten)
/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
β 614 return original_func(*args, **kwargs)
615
616 return wrapper
/opt/conda/default/lib/python3.6/site-packages/hail/table.py in to_pandas(self, flatten)
3234
3235 ββ"
β 3236 return Env.spark_backend(βto_pandasβ).to_pandas(self, flatten)
3237
3238 @staticmethod
/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in to_pandas(self, t, flatten)
339
340 def to_pandas(self, t, flatten):
β 341 return self.to_spark(t, flatten).toPandas()
342
343 def from_pandas(self, df, key):
/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py in toPandas(self)
2148
2149 # Below is toPandas without Arrow optimization.
β 2150 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
2151
2152 dtype = {}
/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py in collect(self)
532 ββ"
533 with SCCallSiteSync(self._sc) as css:
β 534 sock_info = self._jdf.collectToPython()
535 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
536
/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
β 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in deco(*args, **kwargs)
40 raise FatalError(β%s\n\nJava stack trace:\n%s\nβ
41 βHail version: %s\nβ
β> 42 βError summary: %sβ % (deepest, full, hail.version, deepest)) from None
43 except pyspark.sql.utils.CapturedException as e:
44 raise FatalError(β%s\n\nJava stack trace:\n%s\nβ
FatalError: HailException: array index out of bounds: index=0, length=0
Python traceback:
File ββ, line 119, in
gene=ds_result7.vep.transcript_consequences[0].gene_symbol,
File β/opt/conda/default/lib/python3.6/site-packages/hail/expr/expressions/typed_expressions.pyβ, line 776, in getitem
return super().getitem(item)
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 11 in stage 2344.0 failed 20 times, most recent failure: Lost task 11.19 in stage 2344.0 (TID 236455, hailpy600-sw-j76p.us-central1-c.c.cncd-cncd.internal, executor 3441): is.hail.utils.HailException: array index out of bounds: index=0, length=0
Python traceback:
File ββ, line 119, in
gene=ds_result7.vep.transcript_consequences[0].gene_symbol,
File β/opt/conda/default/lib/python3.6/site-packages/hail/expr/expressions/typed_expressions.pyβ, line 776, in getitem
return super().getitem(item)
at __C33025Compiled.applyregion0_13(Unknown Source)
at __C33025Compiled.apply(Unknown Source)
at is.hail.expr.ir.TableMapRows$$anonfun$70$$anonfun$apply$3.apply$mcJJ$sp(TableIR.scala:1529)
at is.hail.expr.ir.TableMapRows$$anonfun$70$$anonfun$apply$3.apply(TableIR.scala:1528)
at is.hail.expr.ir.TableMapRows$$anonfun$70$$anonfun$apply$3.apply(TableIR.scala:1528)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.next(RichContextRDD.scala:74)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:256)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1892)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1880)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1879)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1879)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:927)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:927)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2113)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2062)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2051)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:738)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:990)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
at org.apache.spark.rdd.RDD.collect(RDD.scala:989)
at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3263)
at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:3260)
at org.apache.spark.sql.Dataset$$anonfun$52.apply(Dataset.scala:3370)
at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:80)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:127)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:75)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3369)
at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3260)
at sun.reflect.GeneratedMethodAccessor79.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
is.hail.utils.HailException: array index out of bounds: index=0, length=0
Python traceback:
File ββ, line 119, in
gene=ds_result7.vep.transcript_consequences[0].gene_symbol,
File β/opt/conda/default/lib/python3.6/site-packages/hail/expr/expressions/typed_expressions.pyβ, line 776, in getitem
return super().getitem(item)
at __C33025Compiled.applyregion0_13(Unknown Source)
at __C33025Compiled.apply(Unknown Source)
at is.hail.expr.ir.TableMapRows$$anonfun$70$$anonfun$apply$3.apply$mcJJ$sp(TableIR.scala:1529)
at is.hail.expr.ir.TableMapRows$$anonfun$70$$anonfun$apply$3.apply(TableIR.scala:1528)
at is.hail.expr.ir.TableMapRows$$anonfun$70$$anonfun$apply$3.apply(TableIR.scala:1528)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.next(RichContextRDD.scala:74)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:445)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:256)
at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:858)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.57-582b2e31b8bd
Error summary: HailException: array index out of bounds: index=0, length=0
Python traceback:
File ββ, line 119, in
gene=ds_result7.vep.transcript_consequences[0].gene_symbol,
File β/opt/conda/default/lib/python3.6/site-packages/hail/expr/expressions/typed_expressions.pyβ, line 776, in getitem
return super().getitem(item)