How to extract sample IDs from MT

Hello,

I am trying to use HAIL to quickly annotate VCF files using VEP/LOFTEE and filter for LOF variant carriers. My code works great up until I need to extract the Subject IDs from the MT and then I have faced a host of issues.

start = time.time()

# Step 2: Read VCF files
vcf_files = [
    "file:///mnt/project/file1.vcf.gz",
    "file:///mnt/project/file2.vcf.gz"
]

mt = hl.import_vcf(
    vcf_files,
    force_bgz=True,
    reference_genome="GRCh38",
    min_partitions=100
)

end = time.time()
print(f"Step 2 took {end - start:.4f} seconds")


# Step 3: Normalize
start = time.time()
mt = hl.split_multi_hts(mt)

end = time.time()
print(f"Step 3 took {end - start:.4f} seconds")


# Step 4: Annotate with VEP
start = time.time()
mt = hl.vep(mt, "file:///mnt/project/vep-GRCh38.json")

end = time.time()
print(f"Step 4 took {end - start:.4f} seconds")


# Step 5: Filter for LOF in target gene
start = time.time()
lof_consequences_hl = hl.set([
    "transcript_ablation", "splice_acceptor_variant", "splice_donor_variant",
    "stop_gained", "frameshift_variant", "start_lost", "stop_lost"
])


mt = mt.filter_rows(
    hl.any(lambda csq: (
        (csq.gene_symbol == "PCSK9") &
        hl.any(lambda term: lof_consequences_hl.contains(term), csq.consequence_terms)
    ), mt.vep.transcript_consequences)
)

end = time.time()
print(f"Step 5 took {end - start:.4f} seconds")


# Step 6: Annotate each sample with a flag if they carry any non-ref genotype
start = time.time()
mt = mt.annotate_cols(
    is_lof_carrier = hl.agg.any(mt.GT.is_non_ref())
)

end = time.time()
print(f"Step 6 took {end - start:.4f} seconds")


# Step 7: Collect the sample IDs of carriers
start = time.time()

lof_mt = mt.filter_cols(mt.is_lof_carrier)
# This doesn't work lol (seems to run forever)
carrier_ids = lof_mt.s.collect()

I’ve tried a bunch of .collect(). .select() but I can’t seem to figure this out. I assume I am missing something blatantly obvious but I’ve been stuck on this last part for a while.

Any assistance would be greatly appreciated!