Hi!
I’m trying to convert a subset of a hail table (I’m using ‘select’ in order to select some relevant columns), but I’m getting an error. I think the reason for this error is the size of the hail table, because when I did the same operations on a smaller table - it works. How can I fix it and make it works?
this is the code:
filtered_pd = filtered_ht.select(
AC=filtered_ht.info.AC,
AN=filtered_ht.info.AN,
AF=filtered_ht.info.AF,
variant_type=filtered_ht.info.variant_type,
vep=filtered_ht.info.vep).to_pandas()
This is the error:
Py4JError Traceback (most recent call last)
<timed exec> in <module>
<decorator-gen-1182> in to_pandas(self, flatten)
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
578
579 return wrapper
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/hail/table.py in to_pandas(self, flatten)
3337 dtypes_struct = table.row.dtype
3338 collect_dict = {key: hl.agg.collect(value) for key, value in table.row.items()}
-> 3339 column_struct_array = table.aggregate(hl.struct(**collect_dict))
3340 columns = list(column_struct_array.keys())
3341 data_dict = {}
<decorator-gen-1128> in aggregate(self, expr, _localize)
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
578
579 return wrapper
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/hail/table.py in aggregate(self, expr, _localize)
1229
1230 if _localize:
-> 1231 return Env.backend().execute(hl.ir.MakeTuple([agg_ir]))[0]
1232
1233 return construct_expr(ir.LiftMeOut(agg_ir), expr.dtype)
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/hail/backend/py4j_backend.py in execute(self, ir, timed)
96 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
97 try:
---> 98 result_tuple = self._jbackend.executeEncode(jir, stream_codec)
99 (result, timings) = (result_tuple._1(), result_tuple._2())
100 value = ir.typ._from_encoding(result)
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/py4j/java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/hail/backend/py4j_backend.py in deco(*args, **kwargs)
19 import pyspark
20 try:
---> 21 return f(*args, **kwargs)
22 except py4j.protocol.Py4JJavaError as e:
23 s = e.java_exception.toString()
/cs/labs/michall/ofer.feinstein/my_env/lib/python3.7/site-packages/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
334 raise Py4JError(
335 "An error occurred while calling {0}{1}{2}".
--> 336 format(target_id, ".", name))
337 else:
338 type = answer[1]
Py4JError: An error occurred while calling o1.executeEncode
Thanks in advance,
Ofer