Hi Dan.
You’re right. I was using split_multi_hts
. I think I just copied something wrong. Sorry about not including the full stack trace. This was originally part of email to our group to try and debug amongst ourselves.
Full stack trace example below for the same 20kb region and 245k samples. I don’t see any java traceback on my end. Attached is also the hail log file. Environment is 2 regular workers and 300 preemptible workers with 4 CPUs and 15 GB of RAM each.
def split_multi_allelic(mt):
bi = mt.filter_rows(hl.len(mt.alleles) == 2)
bi = bi.annotate_rows(a_index=1, was_split=False)
multi = mt.filter_rows(hl.len(mt.alleles) > 2)
split = hl.split_multi_hts(multi)
mt = split.union_rows(bi)
return mt
mt = hl.read_matrix_table(mt_two)
mt = split_multi_allelic(mt)
mt.write(mt_three, overwrite=True)
ERROR:root:Exception while sending command. (17 + 292) / 84648]
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/py4j/java_gateway.py", line 1224, in send_command
raise Py4JNetworkError("Answer from Java side is empty")
py4j.protocol.Py4JNetworkError: Answer from Java side is empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/py4j/java_gateway.py", line 1038, in send_command
response = connection.send_command(command)
File "/opt/conda/lib/python3.7/site-packages/py4j/java_gateway.py", line 1229, in send_command
"Error while receiving", e, proto.ERROR_ON_RECEIVE)
py4j.protocol.Py4JNetworkError: Error while receiving
---------------------------------------------------------------------------
Py4JError Traceback (most recent call last)
/tmp/ipykernel_240/4270762384.py in <module>
----> 1 mt.write(mt_three, overwrite=True)
<decorator-gen-1336> in write(self, output, overwrite, stage_locally, _codec_spec, _partitions, _checkpoint_file)
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
578
579 return wrapper
/opt/conda/lib/python3.7/site-packages/hail/matrixtable.py in write(self, output, overwrite, stage_locally, _codec_spec, _partitions, _checkpoint_file)
2582
2583 writer = ir.MatrixNativeWriter(output, overwrite, stage_locally, _codec_spec, _partitions, _partitions_type, _checkpoint_file)
-> 2584 Env.backend().execute(ir.MatrixWrite(self._mir, writer))
2585
2586 class _Show:
/opt/conda/lib/python3.7/site-packages/hail/backend/py4j_backend.py in execute(self, ir, timed)
97 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
98 try:
---> 99 result_tuple = self._jbackend.executeEncode(jir, stream_codec, timed)
100 (result, timings) = (result_tuple._1(), result_tuple._2())
101 value = ir.typ._from_encoding(result)
/opt/conda/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
1321 answer = self.gateway_client.send_command(command)
1322 return_value = get_return_value(
-> 1323 answer, self.gateway_client, self.target_id, self.name)
1324
1325 for temp_arg in temp_args:
/opt/conda/lib/python3.7/site-packages/hail/backend/py4j_backend.py in deco(*args, **kwargs)
19 import pyspark
20 try:
---> 21 return f(*args, **kwargs)
22 except py4j.protocol.Py4JJavaError as e:
23 s = e.java_exception.toString()
/opt/conda/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
--> 336 format(target_id, ".", name))
337 else:
338 type = answer[1]
Py4JError: An error occurred while calling o1.executeEncode
hail log: hail-20230802-1848-0.2.107-2387bb00ceee · GitHub