...
rg37 = hl.get_reference('GRCh37')
rg38 = hl.get_reference('GRCh38')
rg37.add_liftover('/somepath/grch37_to_grch38.over.chain.gz', rg38)
temp['chr'] = temp['chr'].apply(lambda x: str(int(x)) if pd.notna(x) else np.nan)
temp['location'] = temp['location'].apply(lambda x: int(x) if pd.notna(x) else np.nan)
valid_rows = temp.dropna(subset=['chr', 'location']).copy()
if not valid_rows.empty:
try:
ht = hl.Table.from_pandas(valid_rows)
print("Loading complete")
print("Sample data:", ht.show(5))
def safe_locus(chr_val, loc_val):
try:
return hl.locus(hl.str(chr_val), hl.int(loc_val), reference_genome='GRCh37')
except Exception as e:
print(f"Error at chr: {chr_val}, location: {loc_val}")
raise e
ht = ht.annotate(locus=safe_locus(ht.chr, ht.location))
print("19 locus form transition complete")
ht = ht.annotate(new_locus=hl.liftover(ht.locus, 'GRCh38'))
print("37 locus form transition complete")
ht = ht.filter(hl.is_defined(ht.new_locus))
ht = ht.annotate(
chr_38=ht.new_locus.contig,
location_38=ht.new_locus.position
)
result_df = ht.to_pandas()
result_df['chr_38'] = result_df['chr_38'].astype(str)
result_df['location_38'] = result_df['location_38'].astype(float)
except Exception as e:
print("Error during processing:", e)
I am getting an error “cannot impute array elements”. As “Loading complete” is not printing, I believe the step “ht = hl.Table.from_pandas(valid_rows)” is not being completed.
valid_rows consist of 3 million rows.
When I tried with sample data where valid_rows consist of 5 rows, the code ran successfully and I obtained hg38 locus and there was no error. What are the things I need to further look into?