Hello,
Lets say I have a small MatrixTable (~1000 variants) that has multiallelic variants that have been split. I only want certain variants. For the example shown below I would only want the variant that is A->G. I have a dataframe which contains the chromosome, position, ref and alt bases that I want. I know I need to use filter_rows
, but I’m struggling with building the filter expression.
chr1 1000 A G
chr1 1000 A T
# variants are set up as chr1:1000:A:G
ht = hl.Table.from_pandas(variants)
ht = ht.key_by(**hl.parse_variant(ht.variant))
#Attempt 1
mt_filtered = mt.filter_rows(ht[mt.locus] & ht[mt.allele])
---------------------------------------------------------------------------
TableIndexKeyError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/hail/table.py in index(self, all_matches, *exprs)
1777 try:
-> 1778 return self._index(*exprs, all_matches=all_matches)
1779 except TableIndexKeyError as err:
/opt/conda/lib/python3.7/site-packages/hail/table.py in _index(self, all_matches, *exprs)
1851 if not is_interval:
-> 1852 raise TableIndexKeyError(self.key.dtype, exprs)
1853
TableIndexKeyError:
During handling of the above exception, another exception occurred:
ExpressionException Traceback (most recent call last)
/tmp/ipykernel_238/3591338782.py in <module>
----> 1 mt_pgx_only = mt_combined_cleaned.filter_rows(ht[mt_combined_cleaned.locus] & ht[mt_combined_cleaned.allele])
/opt/conda/lib/python3.7/site-packages/hail/table.py in __getitem__(self, item)
374
375 try:
--> 376 return self.index(*wrap_to_tuple(item))
377 except TypeError as e:
378 raise TypeError("Table.__getitem__: invalid index argument(s)\n"
/opt/conda/lib/python3.7/site-packages/hail/table.py in index(self, all_matches, *exprs)
1778 return self._index(*exprs, all_matches=all_matches)
1779 except TableIndexKeyError as err:
-> 1780 raise ExpressionException(f"Key type mismatch: cannot index table with given expressions:\n"
1781 f" Table key: {', '.join(str(t) for t in err.key_type.values()) or '<<<empty key>>>'}\n"
1782 f" Index Expressions: {', '.join(str(e.dtype) for e in err.index_expressions)}")
ExpressionException: Key type mismatch: cannot index table with given expressions:
Table key: locus<GRCh38>, array<str>
Index Expressions: locus<GRCh38>
#Attempt 2
mt_filtered = mt.filter_rows(ht.locus[mt.locus] & ht.alleles[mt.allele])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/tmp/ipykernel_238/483111301.py in <module>
----> 1 mt_pgx_only = mt_combined_cleaned.filter_rows(ht.locus[mt_combined_cleaned.locus] & ht.alleles[mt_combined_cleaned.allele])
TypeError: 'LocusExpression' object is not subscriptable