I was unable to implement the 01-genome-wide-association-study.ipynb notebook on Hail-Notebook-Tutorials. I received the following warning after the hl.init() command:
/opt/conda/lib/python3.10/site-packages/hailtop/aiocloud/aiogoogle/user_config.py:43: UserWarning: Reading spark-defaults.conf to determine GCS requester pays configuration. This is deprecated. Please use `hailctl config set gcs_requester_pays/project` and `hailctl config set gcs_requester_pays/buckets`.
warnings.warn(
SLF4J: No SLF4J providers were found.
SLF4J: Defaulting to no-operation (NOP) logger implementation
SLF4J: See https://www.slf4j.org/codes.html#noProviders for further details.
SLF4J: Class path contains SLF4J bindings targeting slf4j-api versions 1.7.x or earlier.
SLF4J: Ignoring binding found at [jar:file:/usr/lib/spark/jars/log4j-slf4j-impl-2.18.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See https://www.slf4j.org/codes.html#ignoredBindings for an explanation.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Running on Apache Spark version 3.3.0
SparkUI available at http://saturn-26616ce0-7b3e-4578-bb69-4567cf490d15-m.c.terra-67a6826d.internal:37893
Then I got the following error when attempting to download the file:
2023-10-03 21:56:25.636 Hail: INFO: downloading 1KG VCF ...
Source: https://storage.googleapis.com/hail-tutorial/1kg.vcf.bgz
2023-10-03 21:56:28.692 Hail: INFO: importing VCF and writing to matrix table...
---------------------------------------------------------------------------
Py4JNetworkError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/py4j/clientserver.py:516, in ClientServerConnection.send_command(self, command)
515 if answer.strip() == "":
--> 516 raise Py4JNetworkError("Answer from Java side is empty")
517 if answer.startswith(proto.RETURN_MESSAGE):
Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Py4JNetworkError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/py4j/java_gateway.py:1038, in GatewayClient.send_command(self, command, retry, binary)
1037 try:
-> 1038 response = connection.send_command(command)
1039 if binary:
File /opt/conda/lib/python3.10/site-packages/py4j/clientserver.py:539, in ClientServerConnection.send_command(self, command)
538 logger.info("Error while receiving.", exc_info=True)
--> 539 raise Py4JNetworkError(
540 "Error while sending or receiving", e, proto.ERROR_ON_RECEIVE)
Py4JNetworkError: Error while sending or receiving
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In[3], line 1
----> 1 hl.utils.get_1kg('data/')
File /opt/conda/lib/python3.10/site-packages/hail/utils/tutorial.py:86, in get_1kg(output_dir, overwrite)
84 cluster_readable_vcf = _copy_to_tmp(fs, local_path_uri(tmp_vcf), extension='vcf.bgz')
85 info('importing VCF and writing to matrix table...')
---> 86 hl.import_vcf(cluster_readable_vcf, min_partitions=16).write(matrix_table_path, overwrite=True)
88 tmp_sample_annot = os.path.join(tmp_dir, '1kg_annotations.txt')
89 source = resources['1kg_annotations']
File <decorator-gen-1484>:2, in import_vcf(path, force, force_bgz, header_file, min_partitions, drop_samples, call_fields, reference_genome, contig_recoding, array_elements_required, skip_invalid_loci, entry_float_type, filter, find_replace, n_partitions, block_size, _create_row_uids, _create_col_uids)
File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:584, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
581 @decorator
582 def wrapper(__original_func, *args, **kwargs):
583 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 584 return __original_func(*args_, **kwargs_)
File /opt/conda/lib/python3.10/site-packages/hail/methods/impex.py:2822, in import_vcf(path, force, force_bgz, header_file, min_partitions, drop_samples, call_fields, reference_genome, contig_recoding, array_elements_required, skip_invalid_loci, entry_float_type, filter, find_replace, n_partitions, block_size, _create_row_uids, _create_col_uids)
2812 hl.utils.warning(
2813 f'You are trying to read {path} with *ONE* core of parallelism. This '
2814 'will be very slow. If this file is block-gzipped (bgzip-ed), use '
2815 'force_bgz=True instead.'
2816 )
2818 reader = ir.MatrixVCFReader(path, call_fields, entry_float_type, header_file,
2819 n_partitions, block_size, min_partitions,
2820 reference_genome, contig_recoding, array_elements_required,
2821 skip_invalid_loci, force_bgz, force, filter, find_replace)
-> 2822 return MatrixTable(ir.MatrixRead(reader, drop_cols=drop_samples, drop_row_uids=not _create_row_uids, drop_col_uids=not _create_col_uids))
File /opt/conda/lib/python3.10/site-packages/hail/matrixtable.py:709, in MatrixTable.__init__(self, mir)
706 self._col_indices = Indices(self, {self._col_axis})
707 self._entry_indices = Indices(self, {self._row_axis, self._col_axis})
--> 709 self._type = self._mir.typ
711 self._global_type = self._type.global_type
712 self._col_type = self._type.col_type
File /opt/conda/lib/python3.10/site-packages/hail/ir/base_ir.py:494, in MatrixIR.typ(self)
491 @property
492 def typ(self):
493 if self._type is None:
--> 494 self.compute_type(deep_typecheck=False)
495 return self._type
File /opt/conda/lib/python3.10/site-packages/hail/ir/base_ir.py:485, in MatrixIR.compute_type(self, deep_typecheck)
483 def compute_type(self, deep_typecheck):
484 if deep_typecheck or self._type is None:
--> 485 computed = self._compute_type(deep_typecheck)
486 if self._type is not None:
487 assert self._type == computed
File /opt/conda/lib/python3.10/site-packages/hail/ir/matrix_ir.py:185, in MatrixRead._compute_type(self, deep_typecheck)
183 def _compute_type(self, deep_typecheck):
184 if self._type is None:
--> 185 return Env.backend().matrix_type(self)
186 else:
187 return self._type
File /opt/conda/lib/python3.10/site-packages/hail/backend/py4j_backend.py:184, in Py4JBackend.matrix_type(self, mir)
183 def matrix_type(self, mir):
--> 184 jir = self._to_java_matrix_ir(mir)
185 return tmatrix._from_java(jir.typ())
File /opt/conda/lib/python3.10/site-packages/hail/backend/py4j_backend.py:170, in Py4JBackend._to_java_matrix_ir(self, ir)
169 def _to_java_matrix_ir(self, ir):
--> 170 return self._to_java_ir(ir, self._parse_matrix_ir)
File /opt/conda/lib/python3.10/site-packages/hail/backend/py4j_backend.py:145, in Py4JBackend._to_java_ir(self, ir, parse)
143 r = CSERenderer(stop_at_jir=True)
144 # FIXME parse should be static
--> 145 ir._jir = parse(r(finalize_randomness(ir)), ir_map=r.jirs)
146 return ir._jir
File /opt/conda/lib/python3.10/site-packages/hail/backend/py4j_backend.py:158, in Py4JBackend._parse_matrix_ir(self, code, ir_map)
157 def _parse_matrix_ir(self, code, ir_map={}):
--> 158 return self._jbackend.parse_matrix_ir(code, ir_map)
File /opt/conda/lib/python3.10/site-packages/py4j/java_gateway.py:1321, in JavaMember.__call__(self, *args)
1314 args_command, temp_args = self._build_args(*args)
1316 command = proto.CALL_COMMAND_NAME +\
1317 self.command_header +\
1318 args_command +\
1319 proto.END_COMMAND_PART
-> 1321 answer = self.gateway_client.send_command(command)
1322 return_value = get_return_value(
1323 answer, self.gateway_client, self.target_id, self.name)
1325 for temp_arg in temp_args:
File /opt/conda/lib/python3.10/site-packages/py4j/java_gateway.py:1055, in GatewayClient.send_command(self, command, retry, binary)
1053 response = self.send_command(command, binary=binary)
1054 else:
-> 1055 logging.exception(
1056 "Exception while sending command.")
1057 response = proto.ERROR
1058 except KeyboardInterrupt:
1059 # For KeyboardInterrupt triggered from Python shell, it should
1060 # clean up the connection so the connection is
(...)
1064 # See also https://github.com/bartdag/py4j/pull/440 for
1065 # more details.
File /opt/conda/lib/python3.10/logging/__init__.py:2113, in exception(msg, exc_info, *args, **kwargs)
2107 def exception(msg, *args, exc_info=True, **kwargs):
2108 """
2109 Log a message with severity 'ERROR' on the root logger, with exception
2110 information. If the logger has no handlers, basicConfig() is called to add
2111 a console handler with a pre-defined format.
2112 """
-> 2113 error(msg, *args, exc_info=exc_info, **kwargs)
File /opt/conda/lib/python3.10/logging/__init__.py:2105, in error(msg, *args, **kwargs)
2103 if len(root.handlers) == 0:
2104 basicConfig()
-> 2105 root.error(msg, *args, **kwargs)
File /opt/conda/lib/python3.10/logging/__init__.py:1506, in Logger.error(self, msg, *args, **kwargs)
1497 """
1498 Log 'msg % args' with severity 'ERROR'.
1499
(...)
1503 logger.error("Houston, we have a %s", "major problem", exc_info=1)
1504 """
1505 if self.isEnabledFor(ERROR):
-> 1506 self._log(ERROR, msg, args, **kwargs)
TypeError: Log._log() got an unexpected keyword argument 'exc_info'