I get the wrong summary statistic file. The following is my code. I don’t know if there is any mistakes in my pipeline.
contig = "autosomes"
pipeline = 0
if contig == 'autosomes':
contig_expr = 'c{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22}'
else:
contig_expr = contig
if contig not in set(['autosomes', 'chrX', 'chrXY']):
raise ValueError(f'Invalid contig argument "{contig}" - must be one of {{"autosomes", "chrX", "chrXY"}}.')
ht_phenotypes = hl.read_table(project_path + f'data/processed/biomarker/biomarkers_gwas.pipeline_{pipeline}.ht')
ht_covariates = hl.read_table(project_path + f"data/processed/biomarker/gwas_covariates.ht")
ht_variants = hl.import_table(project_path + f'data/raw/gene/variants.tsv', impute=True, missing="")
ht_variants = ht_variants.annotate(variant = hl.parse_variant(ht_variants.variant))
mt = hl.import_bgen(
path=project_path+ f'data/raw/gene/bgen/ukb_{contig_expr}_b0_v3.bgen',
sample_file=project_path+ f'data/raw/gene/sample/{contig}.sample',
entry_fields=['dosage'],
variants=ht_variants.variant)
mt = mt.annotate_cols(
phenotypes=ht_phenotypes[mt.s],
covariates=ht_covariates[mt.s])
phenotypes = list(mt['phenotypes'].keys())
print(phenotypes)
ht = hl.linear_regression_rows(
y=[[mt['phenotypes'][y]] for y in phenotypes],
x=mt.dosage,
covariates=[1, *[mt['covariates'][x] for x in list(mt['covariates'].keys())]],
pass_through=['varid', 'rsid'])
ht = ht.annotate_globals(phenotypes=phenotypes)
ht.write(project_path + f"data/processed/gwas/biomarker_gwas_results.{contig}.pipeline_{pipeline}.ht",
overwrite=True)
Thanks,
Meng