Error: ExpressionException: Cannot combine expressions from different source objects

mgarcia · January 8, 2024, 7:16pm

I am trying to run the following component of our Variant QC script to find variants that fall outside of HWE. We are filtering variants by MAF thresholds: 0.01 for all ancestry groups, save for the European group, for which our threshold is 0.001.

def flag_hwe(ds, pval_threshold, n_participants):
    '''
    Perform sex aware HWE tests on autosomes and chrX for hg38
    '''
    populations = ds.aggregate_cols(hl.agg.counter(ds.sa.POP)) # produces a dict of population
    if None in populations:
        print("WARNING: Skipping HWE test for samples of undefined population")
        del populations[None]
    hwe_tests = {}
    # Pre-queue the 3 hwe tests for each population
    for pop in populations:
        #ds_subpop=hl.agg.filter(ds.sa.POP == pop)
        ds_subpop=hl.variant_qc(ds.filter_cols(ds.sa.POP == pop))
        if pop=='EUR':
            maf_thr=0.001
        elif pop=='AFR':
            maf_thr=0.01
        elif pop=='SAS':
            maf_thr=0.01
        elif pop=='EAS':
            maf_thr=0.01
        ds_subpop = ds_subpop.filter_rows(ds_subpop.variant_qc.AF[1] > maf_thr)
        hwe_tests['hwe{}'.format(pop)] = hl.cond(
            # Filter 1) hwe{Pop}
            # If variant is autosomal and there are enough samples in the population, apply HWE
            ds_subpop.locus.in_autosome() & (hl.agg.count_where(ds_subpop.sa.POP == pop) >= n_participants),
            hl.agg.filter(ds_subpop.sa.POP == pop, hl.agg.hardy_weinberg_test(ds_subpop.GT)),
            # If variant is not autosomal, but in X nonpar and there are enough females in the population, apply HWE
            hl.cond(
                ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Female")) >= n_participants),
                hl.agg.filter((ds.sa.POP == pop) & (ds.sa.GENDER == "Female"), hl.agg.hardy_weinberg_test(ds.GT)),
                hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
            )
        )
        hwe_tests['hwe{}FemalePAR'.format(pop)] = hl.cond(
            # Filter 2) hwe{Pop}Female
            # If this is an X par variant and there are enough females in the population, apply HWE
            ds.locus.in_x_par() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Female")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop) & (ds.sa.GENDER == "Female"), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
        hwe_tests['hwe{}MalePAR'.format(pop)] = hl.cond(
            # Filter 2) hwe{Pop}Male
            # If this is an X par variant and there are enough males in the population, apply HWE
            ds.locus.in_x_par() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Male")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop) & (ds.sa.GENDER == "Male"), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
        hwe_tests['hwe{}FemalenonPAR'.format(pop)] = hl.cond(
            # Filter 3) hwe{Pop}nonPAR
            # If this is an X par variant and there are enough subjects in the population, apply HWE
            ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Female")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
        hwe_tests['hwe{}MalenonPAR'.format(pop)] = hl.cond(
            # Filter 3) hwe{Pop}nonPAR
            # If this is an X par variant and there are enough subjects in the population, apply HWE
            ds.locus.in_x_nonpar() & (hl.agg.count_where((ds.sa.POP == pop) & (ds.sa.GENDER == "Male")) >= n_participants),
            hl.agg.filter((ds.sa.POP == pop), hl.agg.hardy_weinberg_test(ds.GT)),
            hl.struct(het_freq_hwe=-1, p_value=-1) # Fallback: Not enough samples for this population
        )
    # execute tests
    ds = ds.annotate_rows(
        hweByPop=hl.struct(
            **hwe_tests
        )
    )

    hwe_info = {}
    # Pre-queue the 3 info annotations for each population
    # This exports variant.hwe{}.p_value to an info field variant.info.pHWE{}
    for pop in populations:
        hwe_info['pHWE{}'.format(pop)] = ds.hweByPop['hwe{}'.format(pop)].p_value
        hwe_info['pHWE{}FemalePAR'.format(pop)] = ds.hweByPop['hwe{}FemalePAR'.format(pop)].p_value
        hwe_info['pHWE{}MalePAR'.format(pop)] = ds.hweByPop['hwe{}MalePAR'.format(pop)].p_value
        hwe_info['pHWE{}FemalenonPAR'.format(pop)] = ds.hweByPop['hwe{}FemalenonPAR'.format(pop)].p_value
        hwe_info['pHWE{}MalenonPAR'.format(pop)] = ds.hweByPop['hwe{}MalenonPAR'.format(pop)].p_value
    # execute annotations
    ds = ds.annotate_rows(
        info=ds.info.annotate(
            **hwe_info
        )
    )
    # Add filter flags
    for pop in populations:
        # Now flag variants based on the failed populations
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}'.format(pop)] != -1) & (ds.info['pHWE{}'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}FemalePAR'.format(pop)] != -1) & (ds.info['pHWE{}FemalePAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}FemalePAR'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}MalePAR'.format(pop)] != -1) & (ds.info['pHWE{}MalePAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}MalePAR'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}FemalenonPAR'.format(pop)] != -1) & (ds.info['pHWE{}FemalenonPAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}FemalenonPAR'.format(pop)),
                ds.filters
            )
        )
        ds = ds.annotate_rows(
            filters=hl.cond(
                (ds.info['pHWE{}MalenonPAR'.format(pop)] != -1) & (ds.info['pHWE{}MalenonPAR'.format(pop)] < pval_threshold),
                ds.filters.add('HWE{}MalenonPAR'.format(pop)),
                ds.filters
            )
        )
    return ds

I have encountered the following error which I am hoping to better understand. Does this error result from referencing of two different MatrixTables: ds_subpop and ds? Thank you in advance.

Traceback (most recent call last):
  File "hail_variant_qc_v11_run3_YL.py", line 550, in <module>
    vcf = flag_hwe(vcf, args.pval_threshold, args.n_individuals)
  File "hail_variant_qc_v11_run3_YL.py", line 142, in flag_hwe
    hwe_tests['hwe{}'.format(pop)] = hl.cond(
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/deprecated/classic.py", line 285, in wrapper_function
    return wrapped_(*args_, **kwargs_)
  File "<decorator-gen-736>", line 2, in cond
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/typecheck/check.py", line 584, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/expr/functions.py", line 351, in cond
    return if_else(condition, consequent, alternate, missing_false)
  File "<decorator-gen-738>", line 2, in if_else
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/typecheck/check.py", line 584, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/expr/functions.py", line 406, in if_else
    indices, aggregations = unify_all(condition, consequent, alternate)
  File "/gpfs/fs1/home/mg1646/.conda/envs/variantqc_hailenv/lib/python3.8/site-packages/hail/expr/expressions/base_expression.py", line 417, in unify_all
    raise ExpressionException(
hail.expr.expressions.base_expression.ExpressionException: Cannot combine expressions from different source objects.
    Found fields from 2 objects:
        <hail.matrixtable.MatrixTable object at 0x2b7371b28670>: ['locus', 'sa', 'sa', 'GT']
        <hail.matrixtable.MatrixTable object at 0x2b73719a8490>: ['locus', 'sa', 'GT']

Topic		Replies	Views
Cannot combine expressions from different source objects Hail Query & hailctl	1	636	December 16, 2019
ExpressionException: Cannot combine expressions from different source objects Hail Query & hailctl	1	363	April 23, 2021
Error in variant/sample QC. Malformed VCF? Hail Query & hailctl	2	375	March 23, 2021
Issues with sample and variant QC by group Hail Query & hailctl	9	1160	May 14, 2020
Error summary: IllegalArgumentException: requirement failed Hail Query & hailctl	7	1210	February 19, 2022

Error: ExpressionException: Cannot combine expressions from different source objects

Related topics