Hi,
I have a matrix in hail that looks like this
----------------------------------------
Global fields:
'metadata': struct {
filter: dict<str, dict<str, str>>,
info: dict<str, dict<str, str>>,
format: dict<str, dict<str, str>>
}
----------------------------------------
Column fields:
's': str
----------------------------------------
Row fields:
'locus': locus<GRCh38>
'alleles': array<str>
'rsid': str
'qual': float64
'filters': set<str>
'info': struct {
AC: array<int32>,
AF: array<float64>,
AN: int32,
AS_BaseQRankSum: array<float64>,
AS_FS: array<float64>,
AS_FilterStatus: array<str>,
AS_InbreedingCoeff: array<float64>,
AS_MQ: array<float64>,
AS_MQRankSum: array<float64>,
AS_QD: array<float64>,
AS_QUALapprox: str,
AS_RAW_BaseQRankSum: str,
AS_RAW_MQ: str,
AS_RAW_MQRankSum: str,
AS_RAW_ReadPosRankSum: str,
AS_ReadPosRankSum: array<float64>,
AS_SB_TABLE: str,
AS_SOR: array<float64>,
AS_VQSLOD: array<str>,
AS_VarDP: str,
AS_culprit: array<str>,
BaseQRankSum: float64,
DB: bool,
DP: int32,
END: int32,
ExcessHet: float64,
FS: float64,
InbreedingCoeff: float64,
MLEAC: array<int32>,
MLEAF: array<float64>,
MQ: float64,
MQRankSum: float64,
MQ_DP: int32,
NEGATIVE_TRAIN_SITE: bool,
POSITIVE_TRAIN_SITE: bool,
QD: float64,
QUALapprox: int32,
RAW_GT_COUNT: array<int32>,
RAW_MQandDP: array<int32>,
ReadPosRankSum: float64,
SOR: float64,
VQSLOD: float64,
VarDP: int32,
culprit: str,
CSQ: array<str>
}
'transcript_consequences': struct {
allele: str,
consequence: str,
impact: str,
gene_symbol: str,
gene_id: str,
feature_type: str,
feature: str,
biotype: str,
exon: str,
intron: str,
hgvsc: str,
hgvsp: str,
cdNA_position: str,
cds_position: str,
protein_position: str,
amino_acids: str,
codons: str,
existin_variation: str,
allele_num: str,
distatnce: str,
strand: str,
flags: str,
variant_class: str,
minimised: str,
gene_symbol_source: str,
hgnc_id: str,
canonical: str,
mane_select: str,
mane_plus_clinical: str,
tsl: str,
appris: str,
ccds: str,
protein_id: str,
swissprot: str,
trembl: str,
uniparc: str,
uniprot_isoform: str,
gene_pheno: str,
sift_prediction: str,
polyphen_prediction: str,
domains: str,
mirna: str,
hgvs_offset: str,
frequencies: array<str>,
other: array<str>,
lof: str,
lof_filter: str,
lof_flags: str,
lof_info: str
}
----------------------------------------
Entry fields:
'AD': array<int32>
'DP': int32
'GQ': int32
'GT': call
'MIN_DP': int32
'PGT': call
'PID': str
'PL': array<int32>
'PS': int32
'RGQ': int32
'SB': array<int32>
----------------------------------------
Column key: ['s']
Row key: ['locus', 'alleles']
----------------------------------------
but if I try to filter like this I get an attribute error
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/tmp/ipykernel_614/2944841345.py in <module>
----> 1 mt.transcript_consequences.filter(
2 lambda tc: (tc.canonical == 1) & (tc.biotype == "protein_coding")
3 )
/opt/conda/lib/python3.7/site-packages/hail/expr/expressions/typed_expressions.py in __getattr__(self, item)
1771 return self.__dict__[item]
1772 else:
-> 1773 raise AttributeError(get_nice_attr_error(self, item))
1774
1775 def __len__(self):
AttributeError: StructExpression instance has no field, method, or property 'filter'
Did you mean:
Data field: 'lof_filter'
StructExpression method: 'flatten'
What am I doing wrong?