----------------------------------------
Global fields:
None
----------------------------------------
Column fields:
's': str
'pheno': struct {
`Project::Sample`: str,
Project: str,
`Family ID`: str,
Father_ID: str,
Mother_ID: str,
Gender: str,
Phenotype: str
}
----------------------------------------
Row fields:
'locus': locus<GRCh38>
'alleles': array<str>
'rsid': str
'qual': float64
'filters': set<str>
'info': struct {
AC: array<int32>,
AC_adj: array<int32>,
AF: array<float64>,
AN: int32,
AS_AltDP: array<int32>,
AS_BaseQRankSum: array<float64>,
AS_FS: array<float64>,
AS_InbreedingCoeff: array<float64>,
AS_MQ: array<float64>,
AS_MQRankSum: array<float64>,
AS_QD: array<float64>,
AS_QUALapprox: str,
AS_RAW_BaseQRankSum: str,
AS_RAW_MQ: str,
AS_RAW_MQRankSum: str,
AS_RAW_ReadPosRankSum: str,
AS_ReadPosRankSum: array<float64>,
AS_SB_TABLE: str,
AS_SOR: array<float64>,
AS_VarDP: str,
BaseQRankSum: float64,
DB: bool,
DP: int32,
DS: bool,
END: int32,
ExcessHet: float64,
FS: float64,
InbreedingCoeff: float64,
MQ: float64,
MQRankSum: float64,
MQ_DP: int32,
QD: float64,
QUALapprox: int32,
RAW_GT_COUNT: array<int32>,
RAW_MQandDP: array<int32>,
ReadPosRankSum: float64,
SB_TABLE: array<int32>,
SOR: float64,
VarDP: int32
}
'a_index': int32
'was_split': bool
'nfe_AF_e': float64
'fin_AF_e': float64
'oth_AF_e': float64
'asj_AF_e': float64
'afr_AF_e': float64
'eas_AF_e': float64
'sas_AF_e': float64
'amr_AF_e': float64
'raw_AF_e': float64
'variant_qc': struct {
dp_stats: struct {
mean: float64,
stdev: float64,
min: float64,
max: float64
},
gq_stats: struct {
mean: float64,
stdev: float64,
min: float64,
max: float64
},
AC: array<int32>,
AF: array<float64>,
AN: int32,
homozygote_count: array<int32>,
call_rate: float64,
n_called: int64,
n_not_called: int64,
n_filtered: int64,
n_het: int64,
n_non_ref: int64,
het_freq_hwe: float64,
p_value_hwe: float64
}
'old_locus': locus<GRCh38>
'old_alleles': array<str>
'old_to_new': array<int32>
'new_to_old': array<int32>
'vep': struct {
assembly_name: str,
allele_string: str,
ancestral: str,
colocated_variants: array<struct {
aa_allele: str,
aa_maf: float64,
afr_allele: str,
afr_maf: float64,
allele_string: str,
amr_allele: str,
amr_maf: float64,
clin_sig: array<str>,
end: int32,
eas_allele: str,
eas_maf: float64,
ea_allele: str,
ea_maf: float64,
eur_allele: str,
eur_maf: float64,
exac_adj_allele: str,
exac_adj_maf: float64,
exac_allele: str,
exac_afr_allele: str,
exac_afr_maf: float64,
exac_amr_allele: str,
exac_amr_maf: float64,
exac_eas_allele: str,
exac_eas_maf: float64,
exac_fin_allele: str,
exac_fin_maf: float64,
exac_maf: float64,
exac_nfe_allele: str,
exac_nfe_maf: float64,
exac_oth_allele: str,
exac_oth_maf: float64,
exac_sas_allele: str,
exac_sas_maf: float64,
id: str,
minor_allele: str,
minor_allele_freq: float64,
phenotype_or_disease: int32,
pubmed: array<int32>,
sas_allele: str,
sas_maf: float64,
somatic: int32,
start: int32,
strand: int32
}>,
context: str,
end: int32,
id: str,
input: str,
intergenic_consequences: array<struct {
allele_num: int32,
consequence_terms: array<str>,
impact: str,
minimised: int32,
variant_allele: str
}>,
most_severe_consequence: str,
motif_feature_consequences: array<struct {
allele_num: int32,
consequence_terms: array<str>,
high_inf_pos: str,
impact: str,
minimised: int32,
motif_feature_id: str,
motif_name: str,
motif_pos: int32,
motif_score_change: float64,
strand: int32,
variant_allele: str
}>,
regulatory_feature_consequences: array<struct {
allele_num: int32,
biotype: str,
consequence_terms: array<str>,
impact: str,
minimised: int32,
regulatory_feature_id: str,
variant_allele: str
}>,
seq_region_name: str,
start: int32,
strand: int32,
transcript_consequences: array<struct {
allele_num: int32,
amino_acids: str,
appris: str,
biotype: str,
canonical: int32,
ccds: str,
cdna_start: int32,
cdna_end: int32,
cds_end: int32,
cds_start: int32,
codons: str,
consequence_terms: array<str>,
distance: int32,
domains: array<struct {
db: str,
name: str
}>,
exon: str,
gene_id: str,
gene_pheno: int32,
gene_symbol: str,
gene_symbol_source: str,
hgnc_id: str,
hgvsc: str,
hgvsp: str,
hgvs_offset: int32,
impact: str,
intron: str,
lof: str,
lof_flags: str,
lof_filter: str,
lof_info: str,
minimised: int32,
polyphen_prediction: str,
polyphen_score: float64,
protein_end: int32,
protein_start: int32,
protein_id: str,
sift_prediction: str,
sift_score: float64,
strand: int32,
swissprot: str,
transcript_id: str,
trembl: str,
tsl: int32,
uniparc: str,
variant_allele: str
}>,
variant_class: str
}
----------------------------------------
Entry fields:
'AD': array<int32>
'DP': int32
'GQ': int32
'GT': call
'MIN_DP': int32
'PGT': call
'PID': str
'PL': array<int32>
'SB': array<int32>
'VAF': float32
'binom_pval': float64
----------------------------------------
Column key: ['s']
Row key: ['locus', 'alleles']
----------------------------------------
I would like to calculate the number of variants by the category on the phenotype column.
I would also like to count the number of variants per individual to be able to plot it in a histogram.