Jupyter/Windows Setup Help


#1

Hey everyone, trying to get Hail set up, but running into issues:


FatalError Traceback (most recent call last)
in ()
----> 1 hl.import_vcf(“gnomad.exomes.r2.1.1.sites.21.vcf.bgz”).write(‘gnomad.exomes.r2.1.1.sites.21.mt’, overwrite = True)

in write(self, output, overwrite, stage_locally, _codec_spec)

~\Anaconda3\lib\site-packages\hail\typecheck\check.py in wrapper(__original_func, *args, **kwargs)
559 def wrapper(original_func, *args, **kwargs):
560 args
, kwargs
= check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 561 return original_func(*args, **kwargs)
562
563 return wrapper

~\Anaconda3\lib\site-packages\hail\matrixtable.py in write(self, output, overwrite, stage_locally, _codec_spec)
2440
2441 writer = MatrixNativeWriter(output, overwrite, stage_locally, _codec_spec)
-> 2442 Env.backend().execute(MatrixWrite(self._mir, writer))
2443
2444 def globals_table(self) -> Table:

~\Anaconda3\lib\site-packages\hail\backend\backend.py in execute(self, ir)
92 return ir.typ._from_json(
93 Env.hail().expr.ir.CompileAndEvaluate.evaluateToJSON(
—> 94 self._to_java_ir(ir)))
95
96 def value_type(self, ir):

~\Anaconda3\lib\site-packages\py4j\java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:

~\Anaconda3\lib\site-packages\hail\utils\java.py in deco(*args, **kwargs)
225 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
226 ‘Hail version: %s\n’
–> 227 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
228 except pyspark.sql.utils.CapturedException as e:
229 raise FatalError(’%s\n\nJava stack trace:\n%s\n’

FatalError: SparkException: Job 1 cancelled because SparkContext was shut down

Java stack trace:
org.apache.spark.SparkException: Job 1 cancelled because SparkContext was shut down
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:809)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$cleanUpAfterSchedulerStop$1.apply(DAGScheduler.scala:807)
at scala.collection.mutable.HashSet.foreach(HashSet.scala:78)
at org.apache.spark.scheduler.DAGScheduler.cleanUpAfterSchedulerStop(DAGScheduler.scala:807)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onStop(DAGScheduler.scala:1808)
at org.apache.spark.util.EventLoop.stop(EventLoop.scala:83)
at org.apache.spark.scheduler.DAGScheduler.stop(DAGScheduler.scala:1727)
at org.apache.spark.SparkContext$$anonfun$stop$8.apply$mcV$sp(SparkContext.scala:1928)
at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1320)
at org.apache.spark.SparkContext.stop(SparkContext.scala:1927)
at org.apache.spark.SparkContext$$anonfun$2.apply$mcV$sp(SparkContext.scala:581)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:216)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1951)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:188)
at scala.util.Try$.apply(Try.scala:192)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:188)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:178)
at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:619)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2050)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2069)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:944)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:943)
at is.hail.sparkextras.ContextRDD.collect(ContextRDD.scala:196)
at is.hail.rvd.RVD$.getKeyInfo(RVD.scala:1048)
at is.hail.rvd.RVD$.makeCoercer(RVD.scala:1112)
at is.hail.io.vcf.MatrixVCFReader.coercer$lzycompute(LoadVCF.scala:1089)
at is.hail.io.vcf.MatrixVCFReader.coercer(LoadVCF.scala:1089)
at is.hail.io.vcf.MatrixVCFReader.apply(LoadVCF.scala:1117)
at is.hail.expr.ir.TableRead.execute(TableIR.scala:149)
at is.hail.expr.ir.TableRename.execute(TableIR.scala:1481)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:764)
at is.hail.expr.ir.CastTableToMatrix.execute(MatrixIR.scala:1696)
at is.hail.expr.ir.Interpret$.apply(Interpret.scala:763)
at is.hail.expr.ir.Interpret$.apply(Interpret.scala:86)
at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:37)
at is.hail.expr.ir.CompileAndEvaluate$.evaluateToJSON(CompileAndEvaluate.scala:14)
at is.hail.expr.ir.CompileAndEvaluate.evaluateToJSON(CompileAndEvaluate.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Unknown Source)

Hail version: 0.2.11-cf54f08305d1
Error summary: SparkException: Job 1 cancelled because SparkContext was shut down

----------------------------------------
Exception happened during processing of request from (‘127.0.0.1’, 52624)
Traceback (most recent call last):
File “C:\Users\JDD\Anaconda3\lib\socketserver.py”, line 639, in process_request_thread
self.finish_request(request, client_address)
File “C:\Users\JDD\Anaconda3\lib\socketserver.py”, line 361, in finish_request
self.RequestHandlerClass(request, client_address, self)
File “C:\Users\JDD\Anaconda3\lib\socketserver.py”, line 696, in init
self.handle()
File “C:\Users\JDD\Anaconda3\lib\site-packages\hail\utils\java.py”, line 262, in handle
for line in self.rfile:
File “C:\Users\JDD\Anaconda3\lib\socket.py”, line 586, in readinto
return self._sock.recv_into(b)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
----------------------------------------

Any ideas? Thanks!


#2

Hi there. I’m sorry to report that we haven’t invested effort in making Hail work on Windows, so for now you’re stuck running on Mac or Linux.