Hi! I’m brand new to hail. I’m trhying to get the tutorial working “01-genome-wide-association-study”, but I’m stuck on this line in the notebook: “hl.utils.get_1kg(‘data/’)”, with the follwoing error:
2018-07-15 22:23:13 Hail: INFO: downloading 1KG VCF ...
Source: https://storage.googleapis.com/hail-tutorial/1kg.vcf.bgz
2018-07-15 22:23:14 Hail: INFO: importing VCF and writing to matrix table...
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-3-414286e92795> in <module>()
----> 1 hl.utils.get_1kg('data/')
~/hail/python/hail/utils/tutorial.py in get_1kg(output_dir, overwrite)
66 cluster_readable_vcf = Env.jutils().copyToTmp(jhc, local_path_uri(tmp_vcf), 'vcf')
67 info('importing VCF and writing to matrix table...')
---> 68 hl.import_vcf(cluster_readable_vcf, min_partitions=16).write(matrix_table_path, overwrite=True)
69
70 tmp_annot = os.path.join(tmp_dir, '1kg_annotations.txt')
~/hail/python/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
--> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)
~/hail/python/hail/methods/impex.py in import_vcf(path, force, force_bgz, header_file, min_partitions, drop_samples, call_fields, reference_genome, contig_recoding, array_elements_required, skip_invalid_loci)
1799 joption(min_partitions), drop_samples, jset_args(call_fields),
1800 joption(rg), joption(contig_recoding), array_elements_required,
-> 1801 skip_invalid_loci)
1802
1803 return MatrixTable(jmt)
/usr/local/spark-2.2.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
~/hail/python/hail/utils/java.py in deco(*args, **kwargs)
194 raise FatalError('%s\n\nJava stack trace:\n%s\n'
195 'Hail version: %s\n'
--> 196 'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
197 except pyspark.sql.utils.CapturedException as e:
198 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: UnsatisfiedLinkError: is.hail.annotations.Region.nativeCtor()V
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 1.0 failed 1 times, most recent failure: Lost task 2.0 in stage 1.0 (TID 3, localhost, executor driver): java.lang.UnsatisfiedLinkError: is.hail.annotations.Region.nativeCtor()V
at is.hail.annotations.Region.nativeCtor(Native Method)
at is.hail.annotations.Region.<init>(Region.scala:35)
at is.hail.annotations.Region$.apply(Region.scala:15)
at is.hail.rvd.RVDContext$.default(RVDContext.scala:8)
at is.hail.rvd.package$RVDContextIsPointed$.point(package.scala:8)
at is.hail.rvd.package$RVDContextIsPointed$.point(package.scala:6)
at is.hail.sparkextras.ContextRDD$Weaken$$anonfun$apply$4.apply(ContextRDD.scala:64)
at is.hail.sparkextras.ContextRDD$Weaken$$anonfun$apply$4.apply(ContextRDD.scala:64)
at is.hail.sparkextras.ContextRDD.is$hail$sparkextras$ContextRDD$$sparkManagedContext(ContextRDD.scala:129)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1.apply(ContextRDD.scala:138)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1.apply(ContextRDD.scala:137)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2087)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
at is.hail.sparkextras.ContextRDD.collect(ContextRDD.scala:143)
at is.hail.rvd.OrderedRVD$.getPartitionKeyInfo(OrderedRVD.scala:563)
at is.hail.rvd.OrderedRVD$.coerce(OrderedRVD.scala:656)
at is.hail.rvd.OrderedRVD$.coerce(OrderedRVD.scala:640)
at is.hail.io.vcf.LoadVCF$.apply(LoadVCF.scala:911)
at is.hail.HailContext$$anonfun$importVCFs$2.apply(HailContext.scala:641)
at is.hail.HailContext$$anonfun$importVCFs$2.apply(HailContext.scala:639)
at is.hail.HailContext.forceBGZip(HailContext.scala:604)
at is.hail.HailContext.importVCFs(HailContext.scala:639)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)java.lang.UnsatisfiedLinkError: is.hail.annotations.Region.nativeCtor()V
at is.hail.annotations.Region.nativeCtor(Native Method)
at is.hail.annotations.Region.<init>(Region.scala:35)
at is.hail.annotations.Region$.apply(Region.scala:15)
at is.hail.rvd.RVDContext$.default(RVDContext.scala:8)
at is.hail.rvd.package$RVDContextIsPointed$.point(package.scala:8)
at is.hail.rvd.package$RVDContextIsPointed$.point(package.scala:6)
at is.hail.sparkextras.ContextRDD$Weaken$$anonfun$apply$4.apply(ContextRDD.scala:64)
at is.hail.sparkextras.ContextRDD$Weaken$$anonfun$apply$4.apply(ContextRDD.scala:64)
at is.hail.sparkextras.ContextRDD.is$hail$sparkextras$ContextRDD$$sparkManagedContext(ContextRDD.scala:129)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1.apply(ContextRDD.scala:138)
at is.hail.sparkextras.ContextRDD$$anonfun$run$1.apply(ContextRDD.scala:137)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$23.apply(RDD.scala:797)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Hail version: devel-2d767347e1ea
Error summary: UnsatisfiedLinkError: is.hail.annotations.Region.nativeCtor()V