I’m running into this error:
hail.expr.expressions.base_expression.ExpressionException: Cannot combine expressions from different source objects.
Found fields from 2 objects:
<hail.table.Table object at 0x7f5ff943be10>: ['sample_qc', 'sample_qc', 'sample_qc', 'sample_qc', 'sample_qc', 'sample_qc', 'qc_pop']
<hail.table.Table object at 0x7f5ff943b710>: ['batch.c']
while running this code:
if not args.skip_platform_filter:
logger.info('Annotating platform assignments...')
#platform_ht = hl.read_table(platform_pca_results_ht_path(data_source, freeze))
#sample_qc_ht = sample_qc_ht.annotate(qc_platform=platform_ht[sample_qc_ht.key].qc_platform)
sample_map_ht = hl.read_table(array_sample_map_ht(data_source, freeze))
sample_map = hl.import_table(array_sample_map(freeze), delimiter=',', quote='"')
sample_map = sample_map.key_by(s=sample_map.eid_26041)
sample_qc_ht = sample_qc_ht.annotate(**sample_map_ht[sample_qc_ht.s])
sample_qc_ht = sample_qc_ht.annotate(**sample_map[sample_qc_ht.ukbb_app_26041_id])
strata['qc_platform'] = sample_qc_ht['batch.c']
#strata['qc_platform'] = sample_qc_ht.qc_platform
if not args.skip_population_filter:
logger.info('Annotating population assignments...')
pop_ht = hl.read_table(ancestry_hybrid_ht_path(data_source, freeze))
sample_qc_ht = sample_qc_ht.annotate(qc_pop=pop_ht[sample_qc_ht.key][pop_assignment_method])
strata['qc_pop'] = sample_qc_ht.qc_pop
else:
pop_assignment_method = None
# Make qc_metrics a dict (needs to be dict for compute_stratified_metrics_filte
metrics = args.filtering_qc_metrics.split(",")
qc_metrics = {}
for m in metrics:
qc_metrics[m] = sample_qc_ht.sample_qc[f'{m}']
# For each platform and population, aggregate sample QC metrics and calculate the MAD/mean/stdev
logger.info('Flagging samples failing pop/platform-specific sample qc thresholds...')
print(strata)
print(qc_metrics)
pop_platform_filter_ht = compute_stratified_metrics_filter(
sample_qc_ht,
qc_metrics,
strata
)
compute_stratified_metrics_filter
: https://github.com/macarthur-lab/gnomad_hail/blob/master/utils/sample_qc.py#L654
Why is it saying the tables are different? The numbers it’s outputting are the same. Also, changing the code to be
if not args.skip_platform_filter:
logger.info('Annotating platform assignments...')
#platform_ht = hl.read_table(platform_pca_results_ht_path(data_source, freeze))
#sample_qc_ht = sample_qc_ht.annotate(qc_platform=platform_ht[sample_qc_ht.key].qc_platform)
sample_map_ht = hl.read_table(array_sample_map_ht(data_source, freeze))
sample_map = hl.import_table(array_sample_map(freeze), delimiter=',', quote='"')
sample_map = sample_map.key_by(s=sample_map.eid_26041)
sample_qc_ht = sample_qc_ht.annotate(**sample_map_ht[sample_qc_ht.s])
sample_qc_ht = sample_qc_ht.annotate(**sample_map[sample_qc_ht.ukbb_app_26041_id])
#strata['qc_platform'] = sample_qc_ht['batch.c']
#strata['qc_platform'] = sample_qc_ht.qc_platform
if not args.skip_population_filter:
logger.info('Annotating population assignments...')
pop_ht = hl.read_table(ancestry_hybrid_ht_path(data_source, freeze))
sample_qc_ht = sample_qc_ht.annotate(qc_pop=pop_ht[sample_qc_ht.key][pop_assignment_method])
strata['qc_pop'] = sample_qc_ht.qc_pop
strata['qc_platform'] = sample_qc_ht['batch.c']
else:
pop_assignment_method = None
fixes the error.