I am trying to import a .csv file with annotations for the 1k genome project. However, when I run this code:
table = (hl.import_table('gs://1k-genome/1000-genomes/other/sample_info/sample_info.csv', impute = True, delimiter=',', missing=''))
I get the following error:
FatalError Traceback (most recent call last)
in ()
4 #table = hl.import_table(‘gs://1k-genome/1000-genomes/other/sample_info/sample_info.csv’, key_by=(‘Sample’))
5
----> 6 table = (hl.import_table(‘gs://1k-genome/1000-genomes/other/sample_info/sample_info.csv’, impute = True, delimiter=’,’, missing=’’))
/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
--> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)
/home/hail/hail.zip/hail/methods/impex.py in import_table(paths, key, min_partitions, impute, no_header, comment, delimiter, missing, types, quote, skip_blank_lines, force_bgz)
1222 jt = Env.hc()._jhc.importTable(paths, key, min_partitions, jtypes, comment,
1223 delimiter, missing, no_header, impute, quote,
-> 1224 skip_blank_lines, force_bgz)
1225 return Table(jt)
1226
/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/home/hail/hail.zip/hail/utils/java.py in deco(*args, **kwargs)
194 raise FatalError('%s\n\nJava stack trace:\n%s\n'
195 'Hail version: %s\n'
--> 196 'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
197 except pyspark.sql.utils.CapturedException as e:
198 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: HailException: expected 62 fields, but found 63
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 11.0 failed 20 times, most recent failure: Lost task 0.19 in stage 11.0 (TID 182, nneka-w-0.c.avl-hail-ines.internal, executor 2): is.hail.utils.HailException: sample_info.csv: expected 62 fields, but found 63
offending line: "HG00403","SH001","CHS","Southern Han Chinese, China","male"...
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:17)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.utils.Context.wrapException(Context.scala:19)
at is.hail.utils.WithContext.foreach(Context.scala:43)
at is.hail.utils.TextTableReader$$anonfun$5.apply(TextTableReader.scala:116)
at is.hail.utils.TextTableReader$$anonfun$5.apply(TextTableReader.scala:115)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:214)
at scala.collection.AbstractIterator.aggregate(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1137)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1137)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: is.hail.utils.HailException: expected 62 fields, but found 63
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:6)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.utils.TextTableReader$$anonfun$5$$anonfun$apply$2.apply(TextTableReader.scala:119)
at is.hail.utils.TextTableReader$$anonfun$5$$anonfun$apply$2.apply(TextTableReader.scala:116)
at is.hail.utils.WithContext.foreach(Context.scala:41)
... 25 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
at org.apache.spark.rdd.RDD$$anonfun$reduce$1.apply(RDD.scala:1026)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.reduce(RDD.scala:1008)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1.apply(RDD.scala:1151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.treeAggregate(RDD.scala:1128)
at is.hail.utils.TextTableReader$.imputeTypes(TextTableReader.scala:115)
at is.hail.utils.TextTableReader$.read(TextTableReader.scala:217)
at is.hail.HailContext$$anonfun$importTables$3.apply(HailContext.scala:466)
at is.hail.HailContext$$anonfun$importTables$3.apply(HailContext.scala:466)
at is.hail.HailContext.forceBGZip(HailContext.scala:568)
at is.hail.HailContext.importTables(HailContext.scala:465)
at is.hail.HailContext.importTable(HailContext.scala:427)
at sun.reflect.GeneratedMethodAccessor88.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)is.hail.utils.HailException: sample_info.csv: expected 62 fields, but found 63
offending line: "HG00403","SH001","CHS","Southern Han Chinese, China","male"...
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:17)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.utils.Context.wrapException(Context.scala:19)
at is.hail.utils.WithContext.foreach(Context.scala:43)
at is.hail.utils.TextTableReader$$anonfun$5.apply(TextTableReader.scala:116)
at is.hail.utils.TextTableReader$$anonfun$5.apply(TextTableReader.scala:115)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:214)
at scala.collection.AbstractIterator.aggregate(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1137)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1137)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)is.hail.utils.HailException: expected 62 fields, but found 63
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:6)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.utils.TextTableReader$$anonfun$5$$anonfun$apply$2.apply(TextTableReader.scala:119)
at is.hail.utils.TextTableReader$$anonfun$5$$anonfun$apply$2.apply(TextTableReader.scala:116)
at is.hail.utils.WithContext.foreach(Context.scala:41)
at is.hail.utils.TextTableReader$$anonfun$5.apply(TextTableReader.scala:116)
at is.hail.utils.TextTableReader$$anonfun$5.apply(TextTableReader.scala:115)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.aggregate(TraversableOnce.scala:214)
at scala.collection.AbstractIterator.aggregate(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$24.apply(RDD.scala:1136)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1137)
at org.apache.spark.rdd.RDD$$anonfun$treeAggregate$1$$anonfun$25.apply(RDD.scala:1137)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Hail version: devel-aa83f2a1d041
Error summary: HailException: expected 62 fields, but found 63