NOTE, it seems like a new update to hail has solved this problem. If anyone wants to comment or comment how to delete this post, please do so
------ START OUTDATED POST ------
I’ve been trying to get Hail working in a HPC environment. I was hoping to get multiple users to work on hail at the same time using the same shared filesystem. My design was to use a central code and library repository where there is a $CODE_HOME/hail/ and a $CODE_HOME/miniconda/ python installation, which all users PATHs are pointing to. This worked fine for both interactive and spark-submit uses with a single user, but today when I was testing with multiple users the HailContext would fail to form intermittently on a call to hc = HailContext()
with either one of two errors. Note, each user today was ssh’ed into a different node and we were all using different jupyter notebooks simultaneously. There were five of us, and everytime we would all try to start HailContext
at least one of us would fail out with these errors. Most of the time all five of us would fail out. Also note that concurrent calls to python only would be fine, with from hail import *
working fine.
Any help at all would be wonderful, as we would really like to work collaboratively on the cluster at the same time and all be referencing the same hail and python installations so we can keep our code synchronized.
The first error that we would get would be
---------
OSError Traceback (most recent call last)
<ipython-input-11-2841f1963bb0> in <module>()
----> 1 hc_rav = HailContext()
/scratch/PI/dpwall/computeEnvironments/hail/python/hail/context.pyc in __init__(self, sc, appName, master, local, log, quiet, append, parquet_compression, min_block_size, branching_factor, tmp_dir)
45
46 from pyspark import SparkContext
---> 47 SparkContext._ensure_initialized()
48
49 self._gateway = SparkContext._gateway
/share/sw/free/spark.2.1.0/spark-2.1.0-bin-hadoop2.7/python/pyspark/context.py in _ensure_initialized(cls, instance, gateway, conf)
254 with SparkContext._lock:
255 if not SparkContext._gateway:
--> 256 SparkContext._gateway = gateway or launch_gateway(conf)
257 SparkContext._jvm = SparkContext._gateway.jvm
258
/share/sw/free/spark.2.1.0/spark-2.1.0-bin-hadoop2.7/python/pyspark/java_gateway.py in launch_gateway(conf)
75 def preexec_func():
76 signal.signal(signal.SIGINT, signal.SIG_IGN)
---> 77 proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
78 else:
79 # preexec_fn not supported on Windows
/scratch/PI/dpwall/computeEnvironments/miniconda2/lib/python2.7/subprocess.pyc in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags)
388 p2cread, p2cwrite,
389 c2pread, c2pwrite,
--> 390 errread, errwrite)
391 except Exception:
392 # Preserve original exception in case os.close raises.
/scratch/PI/dpwall/computeEnvironments/miniconda2/lib/python2.7/subprocess.pyc in _execute_child(self, args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, shell, to_close, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite)
1022 raise
1023 child_exception = pickle.loads(data)
-> 1024 raise child_exception
1025
1026
OSError: [Errno 2] No such file or directory
and the second error we would get would be
-------------------------------------------------------------------------
Py4JJavaError Traceback (most recent call last)
<ipython-input-6-93fa734a63bb> in <module>()
----> 1 hc_nate = HailContext()
/scratch/PI/dpwall/computeEnvironments/hail/python/hail/context.pyc in __init__(self, sc, appName, master, local, log, quiet, append, parquet_compression, min_block_size, branching_factor, tmp_dir)
60 self._jhc = scala_object(self._hail, 'HailContext').apply(
61 jsc, appName, joption(master), local, log, quiet, append,
---> 62 parquet_compression, min_block_size, branching_factor, tmp_dir)
63
64 self._jsc = self._jhc.sc()
/share/sw/free/spark.2.1.0/spark-2.1.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/share/sw/free/spark.2.1.0/spark-2.1.0-bin-hadoop2.7/python/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/share/sw/free/spark.2.1.0/spark-2.1.0-bin-hadoop2.7/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
317 raise Py4JJavaError(
318 "An error occurred while calling {0}{1}{2}.\n".
--> 319 format(target_id, ".", name), value)
320 else:
321 raise Py4JError(
Py4JJavaError: An error occurred while calling o68.apply.
: org.apache.spark.SparkException: Only one SparkContext may be running in this JVM (see SPARK-2243). To ignore this error, set spark.driver.allowMultipleContexts = true. The currently running SparkContext was created at:
org.apache.spark.SparkContext.<init>(SparkContext.scala:76)
is.hail.HailContext$.configureAndCreateSparkContext(HailContext.scala:84)
is.hail.HailContext$.apply(HailContext.scala:164)
sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
java.lang.reflect.Method.invoke(Method.java:498)
py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
py4j.Gateway.invoke(Gateway.java:280)
py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
py4j.commands.CallCommand.execute(CallCommand.java:79)
py4j.GatewayConnection.run(GatewayConnection.java:214)
java.lang.Thread.run(Thread.java:745)
at org.apache.spark.SparkContext$$anonfun$assertNoOtherContextIsRunning$2.apply(SparkContext.scala:2278)
at org.apache.spark.SparkContext$$anonfun$assertNoOtherContextIsRunning$2.apply(SparkContext.scala:2274)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.SparkContext$.assertNoOtherContextIsRunning(SparkContext.scala:2274)
at org.apache.spark.SparkContext$.markPartiallyConstructed(SparkContext.scala:2353)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:85)
at is.hail.HailContext$.configureAndCreateSparkContext(HailContext.scala:84)
at is.hail.HailContext$.apply(HailContext.scala:164)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:745)
Thank you so much!!