Hey hail team, I’m running into a weird bug. I’m running (version 0.2.28-61941242c15d
):
data_source = 'broad'
freeze = 5
hardcalls = get_ukbb_data(data_source, freeze, raw=False, split=True, adj=False)
sample_map_ht = hl.read_table(array_sample_map_ht(data_source, freeze))
sample_map = hl.import_table(array_sample_map(freeze), delimiter=',', quote='"')
sample_map = sample_map.key_by(s=sample_map.eid_26041)
print(hardcalls.count())
chr20 = hl.filter_intervals(hardcalls, [hl.parse_locus_interval('chr20', reference_genome='GRCh38')])
print(chr20.count())
chr20 = chr20.select_rows('a_index', 'was_split')
chr20 = chr20.annotate_cols(**sample_map_ht[chr20.s])
chr20 = chr20.annotate_cols(**sample_map[chr20.ukbb_app_26041_id])
chr20 = chr20.select_cols('batch', 'batch.c')
chr20 = chr20.transmute_cols(batch_num=chr20['batch'],
batch=chr20['batch.c'])
chr20.describe()
chr20 = chr20.annotate_rows(
n_not_called_50K=chr20.aggregate_cols(
hl.agg.filter(chr20.batch == '150K',
hl.agg.count_where(hl.is_missing(chr20.GT)))),
n_not_called_100K=chr20.aggregate_cols(
hl.agg.filter(chr20.batch == '100K',
hl.agg.count_where(hl.is_missing(chr20.GT)))),
n_not_called_200K=chr20.aggregate_cols(
hl.agg.filter(chr20.batch == '200K',
hl.agg.count_where(hl.is_missing(chr20.GT))))
)
and getting this error:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-5-10337294da16> in <module>
2 n_not_called_50K=chr20.aggregate_cols(
3 hl.agg.filter(chr20.batch == '150K',
----> 4 hl.agg.count_where(hl.is_missing(chr20.GT)))),
5 n_not_called_100K=chr20.aggregate_cols(
6 hl.agg.filter(chr20.batch == '100K',
</opt/conda/default/lib/python3.6/site-packages/decorator.py:decorator-gen-1157> in aggregate_cols(self, expr, _localize)
/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
583 def wrapper(__original_func, *args, **kwargs):
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585 return __original_func(*args_, **kwargs_)
586
587 return wrapper
/opt/conda/default/lib/python3.6/site-packages/hail/matrixtable.py in aggregate_cols(self, expr, _localize)
2037 agg_ir = TableAggregate(MatrixColsTable(base._mir), subst_query)
2038 if _localize:
-> 2039 return Env.backend().execute(agg_ir)
2040 else:
2041 return construct_expr(agg_ir, expr.dtype)
/opt/conda/default/lib/python3.6/site-packages/hail/backend/backend.py in execute(self, ir, timed)
107
108 def execute(self, ir, timed=False):
--> 109 result = json.loads(Env.hc()._jhc.backend().executeJSON(self._to_java_ir(ir)))
110 value = ir.typ._from_json(result['value'])
111 timings = result['timings']
/opt/conda/default/lib/python3.6/site-packages/hail/backend/backend.py in _to_java_ir(self, ir)
103 r = CSERenderer(stop_at_jir=True)
104 # FIXME parse should be static
--> 105 ir._jir = ir.parse(r(ir), ir_map=r.jirs)
106 return ir._jir
107
/opt/conda/default/lib/python3.6/site-packages/hail/ir/renderer.py in __call__(self, root)
181
182 def __call__(self, root: 'ir.BaseIR') -> str:
--> 183 binding_sites = CSEAnalysisPass(self)(root)
184 return CSEPrintPass(self)(root, binding_sites)
185
/opt/conda/default/lib/python3.6/site-packages/hail/ir/renderer.py in __call__(self, root)
251
252 if isinstance(child, ir.IR):
--> 253 bind_depth = child_frame.bind_depth()
254 lets = None
255 if bind_depth < len(stack):
/opt/conda/default/lib/python3.6/site-packages/hail/ir/renderer.py in bind_depth(self)
345 bind_depth = max(bind_depth, max(self.context[0][var] for var in self.node.free_vars))
346 if len(self.node.free_agg_vars) > 0:
--> 347 bind_depth = max(bind_depth, max(self.context[1][var] for var in self.node.free_agg_vars))
348 if len(self.node.free_scan_vars) > 0:
349 bind_depth = max(bind_depth, max(self.context[2][var] for var in self.node.free_scan_vars))
/opt/conda/default/lib/python3.6/site-packages/hail/ir/renderer.py in <genexpr>(.0)
345 bind_depth = max(bind_depth, max(self.context[0][var] for var in self.node.free_vars))
346 if len(self.node.free_agg_vars) > 0:
--> 347 bind_depth = max(bind_depth, max(self.context[1][var] for var in self.node.free_agg_vars))
348 if len(self.node.free_scan_vars) > 0:
349 bind_depth = max(bind_depth, max(self.context[2][var] for var in self.node.free_scan_vars))
KeyError: 'g'
I’d appreciate any insight into this error!