1. choose_cols
@tpoterba’s answer does not work for me. Here is my code:
arr_1 = np.random.uniform(0, 1, 98622)
bool_arr = arr_1 < 0.001
idx_arr = np.array(range(98622))
idx_arr_ss = idx_arr[bool_arr]
idx_arr_ss = list(idx_arr_ss)
mt_filt = mt_wgs.choose_cols(idx_arr_ss)
This gives me the error “expected Sequence[int], found list…” Full error is pasted below:
---------------------------------------------------------------------------
TypecheckFailure Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in arg_check(arg, function_name, arg_name, checker)
583 try:
--> 584 return checker.check(arg, function_name, arg_name)
585 except TypecheckFailure as e:
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in check(self, x, caller, param)
102 for elt in x:
--> 103 elt_ = tc.check(elt, caller, param)
104 x_.append(elt_)
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in check(self, x, caller, param)
264 return x
--> 265 raise TypecheckFailure
266
TypecheckFailure:
The above exception was the direct cause of the following exception:
TypeError Traceback (most recent call last)
/tmp/ipykernel_111/3002698298.py in <module>
4 idx_arr_ss = idx_arr[bool_arr]
5 idx_arr_ss = list(idx_arr_ss)
----> 6 mt_filt = mt_wgs.choose_cols(idx_arr_ss)
<decorator-gen-1292> in choose_cols(self, indices)
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
574 @decorator
575 def wrapper(__original_func, *args, **kwargs):
--> 576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
577 return __original_func(*args_, **kwargs_)
578
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in check_all(f, args, kwargs, checks, is_method)
541 raise TypeError(
542 f'Expected {n_pos_args} positional arguments, found {len(args)}')
--> 543 args_.append(arg_check(args[i], name, arg_name, checker))
544 elif param.kind in (param.KEYWORD_ONLY, param.POSITIONAL_OR_KEYWORD):
545 arg = kwargs.pop(arg_name, param.default)
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in arg_check(arg, function_name, arg_name, checker)
590 expected=checker.expects(),
591 found=checker.format(arg)
--> 592 )) from e
593
594
TypeError: choose_cols: parameter 'indices': expected Sequence[int], found list: [548, 559, 696, 3116, 3337, 4160, 5048, 6571, 7964, 8994, 10192, 11747, 12001, 12442, 12463, 12495, 13046, 13434, 16145, 16431, 16703, 17854, 17880, 19351, 19405, 19882, 20963, 21081, 21121, 22384, 24354, 24401, 24606, 24835, 28074, 28487, 28953, 29204, 31414, 32207, 32994, 33138, 33341, 35361, 36547, 37915, 38708, 38748, 40381, 40426, 42061, 42834, 43292, 43806, 45601, 45621, 47220, 48844, 52364, 54300, 54679, 55036, 55222, 56614, 57399, 58965, 61907, 63805, 66758, 67560, 67783, 68499, 69318, 72045, 72968, 73859, 75573, 75737, 76930, 77283, 78666, 79195, 80237, 81115, 81498, 84848, 85988, 87187, 88207, 90315, 90674, 91849, 91851, 92237, 93268, 93577, 93960, 94219, 96175, 96497, 97699]
2. sample_cols
@danking’s first answer to use sample_cols also does not work for me, though I am not sure why. I create the filtered table with
mt_filt = mt_wgs.sample_cols(0.001)
I can successfully use mt_filt.describe()
to look at the fields, but when I try mt_filt.count()
I get the following error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_111/3205129198.py in <module>
----> 1 mt_filt.count()
/opt/conda/lib/python3.7/site-packages/hail/matrixtable.py in count(self)
2440 Number of rows, number of cols.
2441 """
-> 2442 count_ir = ir.MatrixCount(self._mir)
2443 return Env.backend().execute(count_ir)
2444
<decorator-gen-522> in __init__(self, child)
/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
578
579 return wrapper
/opt/conda/lib/python3.7/site-packages/hail/ir/ir.py in __init__(self, child)
2807 @typecheck_method(child=MatrixIR)
2808 def __init__(self, child):
-> 2809 child = child.handle_randomness(None, None)
2810 super().__init__(child)
2811 self.child = child
/opt/conda/lib/python3.7/site-packages/hail/ir/base_ir.py in handle_randomness(self, row_uid_field_name, col_uid_field_name)
450 if row_uid_field_name is None and col_uid_field_name and not self.uses_randomness:
451 return self
--> 452 result = self._handle_randomness(row_uid_field_name, col_uid_field_name)
453 assert result is not None
454 assert row_uid_field_name is None or row_uid_field_name in result.typ.row_type
/opt/conda/lib/python3.7/site-packages/hail/ir/matrix_ir.py in _handle_randomness(self, row_uid_field_name, col_uid_field_name)
555 pred = ir.Let('sa', old_col, self.pred)
556 if self.pred.uses_randomness:
--> 557 pred = ir.Let('__rng_state', ir.RNGSplit(ir.RNGStateLiteral(rng_key), col_uid))
558 result = MatrixFilterCols(child, pred)
559 if drop_col_uid:
TypeError: __init__() missing 1 required positional argument: 'body'
3. Coin flip with Hail
@danking’s suggestion to do the coin flipping directly in Hail appears to solve my problem. Thank you both for your help!