Hi all,
I am currently working on developing a python package for GWAS pipelines using hail. It works fine when I run with spark cluster mode. However, when I try to use hail using spark local mode, it throws the following error.
Is it not possible to read hail matrix tables in spark local mode using hail?
Stack trace
Traceback (most recent call last):
File "tests/test.py", line 118, in test_use_cohort_from_source
self.assertTrue(expr=ct.output().mt.exists(), msg=f'{ct.output().mt} does not exist!')
File "/home/users/ab904123/piranha_package/piranha/piranha/workflows/target.py", line 223, in exists
return self.spark_state == 'complete'
File "/home/users/ab904123/piranha_package/piranha/piranha/workflows/target.py", line 215, in spark_state
if self.fs.exists(self.completeness_file):
File "/home/users/ab904123/piranha_package/piranha/piranha/bmrn_luigi_ext/config.py", line 186, in wrapped
return fn(*args, **kwargs)
File "/home/users/ab904123/piranha_package/piranha/piranha/workflows/filesystem.py", line 17, in exists
return hl.hadoop_exists(path)
File "/bmrn/apps/hail/0.2.42/python/hail-0.2.42-py3-none-any.egg/hail/utils/hadoop_utils.py", line 128, in hadoop_exists
return Env.fs().exists(path)
File "/bmrn/apps/hail/0.2.42/python/hail-0.2.42-py3-none-any.egg/hail/fs/hadoop_fs.py", line 30, in exists
return self._jfs.exists(path)
File "/bmrn/apps/spark/2.4.5/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/bmrn/apps/hail/0.2.42/python/hail-0.2.42-py3-none-any.egg/hail/backend/spark_backend.py", line 41, in deco
'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
hail.utils.java.FatalError: IllegalArgumentException: null
Java stack trace:
java.lang.IllegalArgumentException: null
at java.util.concurrent.ThreadPoolExecutor.<init>(ThreadPoolExecutor.java:1314)
at java.util.concurrent.ThreadPoolExecutor.<init>(ThreadPoolExecutor.java:1237)
at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:280)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
at is.hail.io.fs.HadoopFS.fileStatus(HadoopFS.scala:148)
at is.hail.io.fs.FS$class.exists(FS.scala:114)
at is.hail.io.fs.HadoopFS.exists(HadoopFS.scala:57)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)