I am trying to run some code on a google megamem machine, and right now am testing the setup process on a standard machine. I am getting an error when I try to use hail to read in a file stored in a google bucket.
This is how I start my machine (the jre file is to install java):
gcloud compute instances create lkptest --machine-type n1-standard-8
gcloud compute scp python_code.py lkptest:.
gcloud compute scp jre-8u271-linux-x64.tar.gz lkptest:.
gcloud compute ssh lkptest
Then I install packages:
sudo apt-get update
sudo apt-get install python3.6
sudo apt-get install python3-pip
pip3 install -U hail
pip3 install numpy scipy matplotlib ipython pandas
export PATH=$PATH:/home/lilllianpetersen/.local/bin
curl https://broad.io/install-gcs-connector | python # To connect a bucket to hail
And I use the following commands to install java:
mkdir software/java/ -p
mv jre-8u271-linux-x64.tar.gz software/java/
cd software/java/
tar zxvf jre-8u271-linux-x64.tar.gz
cd ~
export JAVA_HOME=~/software/java/jre1.8.0_271
export PATH=$JAVA_HOME/bin:$PATH
Then I use the following code in python to read in a block matrix:
grm = hl.linalg.BlockMatrix.read("gs://ukb-gt/grm_file")
I get the following error after running the python code:
Initializing Hail with default parameters...
2020-12-20 19:24:38 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
2020-12-20 19:24:38 WARN Hail:37 - This Hail JAR was compiled for Spark 2.4.5, running with Spark 2.4.1.
Compatibility is not guaranteed.
Running on Apache Spark version 2.4.1
SparkUI available at http://lkptest.c.ukbb-pca.internal:4040
Welcome to
__ __ <>__
/ /_/ /__ __/ /
/ __ / _ `/ / /
/_/ /_/\_,_/_/_/ version 0.2.61-3c86d3ba497a
LOGGING: writing to /home/lilllianpetersen/hail-20201220-1924-0.2.61-3c86d3ba497a.log
---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
~/megamem_compute_pcas.py in <module>
36 # Convert to numpy
37 print('\n\nCONVERTING GRM TO NUMPY...\n\n')
---> 38 np_grm = grm.to_numpy()
39 print('\n\nSUCCESSFULLY CONVERTED GRM TO NUMPY!\n\n')
40
<decorator-gen-1468> in to_numpy(self, _force_blocking)
~/.local/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(__original_func, *args, **kwargs):
613 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 614 return __original_func(*args_, **kwargs_)
615
616 return wrapper
~/.local/lib/python3.7/site-packages/hail/linalg/blockmatrix.py in to_numpy(self, _force_blocking)
1190 """
1191
-> 1192 if self.n_rows * self.n_cols > 1 << 31 or _force_blocking:
1193 path = new_temp_file()
1194 self.export_blocks(path, binary=True)
~/.local/lib/python3.7/site-packages/hail/linalg/blockmatrix.py in n_rows(self)
523 :obj:`int`
524 """
--> 525 return self.shape[0]
526
527 @property
~/.local/lib/python3.7/site-packages/hail/linalg/blockmatrix.py in shape(self)
552 Number of rows and number of columns.
553 """
--> 554 return tensor_shape_to_matrix_shape(self._bmir)
555
556 @property
~/.local/lib/python3.7/site-packages/hail/ir/blockmatrix_ir.py in tensor_shape_to_matrix_shape(bmir)
379
380 def tensor_shape_to_matrix_shape(bmir):
--> 381 shape = bmir.typ.shape
382 is_row_vector = bmir.typ.is_row_vector
383
~/.local/lib/python3.7/site-packages/hail/ir/base_ir.py in typ(self)
383 def typ(self):
384 if self._type is None:
--> 385 self._compute_type()
386 assert self._type is not None, self
387 return self._type
~/.local/lib/python3.7/site-packages/hail/ir/blockmatrix_ir.py in _compute_type(self)
23
24 def _compute_type(self):
---> 25 self._type = Env.backend().blockmatrix_type(self)
26
27
~/.local/lib/python3.7/site-packages/hail/backend/spark_backend.py in blockmatrix_type(self, bmir)
298
299 def blockmatrix_type(self, bmir):
--> 300 jir = self._to_java_blockmatrix_ir(bmir)
301 return tblockmatrix._from_java(jir.typ())
302
~/.local/lib/python3.7/site-packages/hail/backend/spark_backend.py in _to_java_blockmatrix_ir(self, ir)
268
269 def _to_java_blockmatrix_ir(self, ir):
--> 270 return self._to_java_ir(ir, self._parse_blockmatrix_ir)
271
272 def value_type(self, ir):
~/.local/lib/python3.7/site-packages/hail/backend/spark_backend.py in _to_java_ir(self, ir, parse)
255 r = CSERenderer(stop_at_jir=True)
256 # FIXME parse should be static
--> 257 ir._jir = parse(r(ir), ir_map=r.jirs)
258 return ir._jir
259
~/.local/lib/python3.7/site-packages/hail/backend/spark_backend.py in _parse_blockmatrix_ir(self, code, ref_map, ir_map)
236
237 def _parse_blockmatrix_ir(self, code, ref_map={}, ir_map={}):
--> 238 return self._jbackend.parse_blockmatrix_ir(code, ref_map, ir_map)
239
240 @property
~/.local/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
~/.local/lib/python3.7/site-packages/hail/backend/py4j_backend.py in deco(*args, **kwargs)
30 raise FatalError('%s\n\nJava stack trace:\n%s\n'
31 'Hail version: %s\n'
---> 32 'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
33 except pyspark.sql.utils.CapturedException as e:
34 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: IOException: No FileSystem for scheme: gs
Java stack trace:
java.io.IOException: No FileSystem for scheme: gs
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2660)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2667)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
at is.hail.io.fs.HadoopFS.getFileSystem(HadoopFS.scala:98)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:80)
at is.hail.io.fs.FS$class.open(FS.scala:139)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:70)
at is.hail.io.fs.FS$class.open(FS.scala:148)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:70)
at is.hail.linalg.BlockMatrix$.readMetadata(BlockMatrix.scala:167)
at is.hail.expr.ir.BlockMatrixNativeReader$.apply(BlockMatrixIR.scala:125)
at is.hail.expr.ir.BlockMatrixNativeReader$.fromJValue(BlockMatrixIR.scala:132)
at is.hail.expr.ir.BlockMatrixReader$.fromJValue(BlockMatrixIR.scala:101)
at is.hail.expr.ir.IRParser$.blockmatrix_ir1(Parser.scala:1867)
at is.hail.expr.ir.IRParser$$anonfun$blockmatrix_ir$1.apply(Parser.scala:1858)
at is.hail.expr.ir.IRParser$$anonfun$blockmatrix_ir$1.apply(Parser.scala:1858)
at is.hail.utils.StackSafe$More.advance(StackSafe.scala:64)
at is.hail.utils.StackSafe$.run(StackSafe.scala:16)
at is.hail.utils.StackSafe$StackFrame.run(StackSafe.scala:32)
at is.hail.expr.ir.IRParser$$anonfun$parse_blockmatrix_ir$1.apply(Parser.scala:1963)
at is.hail.expr.ir.IRParser$$anonfun$parse_blockmatrix_ir$1.apply(Parser.scala:1963)
at is.hail.expr.ir.IRParser$.parse(Parser.scala:1946)
at is.hail.expr.ir.IRParser$.parse_blockmatrix_ir(Parser.scala:1963)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_blockmatrix_ir$1$$anonfun$apply$24.apply(SparkBackend.scala:614)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_blockmatrix_ir$1$$anonfun$apply$24.apply(SparkBackend.scala:613)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:25)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:23)
at is.hail.utils.package$.using(package.scala:618)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:23)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:247)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_blockmatrix_ir$1.apply(SparkBackend.scala:613)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_blockmatrix_ir$1.apply(SparkBackend.scala:612)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:59)
at is.hail.backend.spark.SparkBackend.parse_blockmatrix_ir(SparkBackend.scala:612)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.61-3c86d3ba497a
Error summary: IOException: No FileSystem for scheme: gs
Are my set up steps correct, and how am I supposed to read the file?
Thanks!