Cannot Annotate DB

I am running the following code;

data = db.annotate_rows_db(data, 'dbNSFP_genes','dbNSFP_variants','gnomad_exome_sites')

Making cluster with command;

hailctl dataproc start my-cluster --requester-pays-allow-annotation-db --region us-central1

The error im facing in the code is;

Hail version: 0.2.47-d9e1f3a110c8
Error summary: HailException: MatrixTable and Table files are directories; path ‘gs://gnomad-public/release/2.1.1/ht/exomes/gnomad.exomes.r2.1.1.sites.ht’ is not a directory

Update to latest version of hail. Data may have moved.

2 Likes

We should add a warning on the docs about this. The annotation database is not a feature that should be expected to have forwards compatibility, I think, at least in the foreseeable future.

1 Like

I have updated my hail but the same error is occuring.

Pasting the full error trace here;

FatalError Traceback (most recent call last)
in
8
9 db = hl.experimental.DB()
—> 10 data = db.annotate_rows_db(data, ‘dbNSFP_genes’,‘dbNSFP_variants’,‘gnomad_exome_sites’)
11
12 data1 = hl.split_multi_hts(data)

in annotate_rows_db(self, rel, *names)

/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args
, kwargs
= check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 614 return original_func(*args, **kwargs)
615
616 return wrapper

/opt/conda/default/lib/python3.6/site-packages/hail/experimental/db.py in annotate_rows_db(self, rel, *names)
207 rel = rel.annotate(**{dataset.name: genes.index(rel.key)[dataset.name]})
208 else:
–> 209 indexed_value = dataset.index_compatible_version(rel.key)
210 if isinstance(indexed_value.dtype, hl.tstruct) and len(indexed_value.dtype) == 0:
211 indexed_value = hl.is_defined(indexed_value)

/opt/conda/default/lib/python3.6/site-packages/hail/experimental/db.py in index_compatible_version(self, key_expr)
62 index
63 for index in (version.maybe_index(key_expr, all_matches)
—> 64 for version in self.versions)
65 if index is not None]
66 if len(compatible_indexed_values) == 0:

/opt/conda/default/lib/python3.6/site-packages/hail/experimental/db.py in (.0)
60 all_matches = ‘unique’ not in self.key_properties
61 compatible_indexed_values = [
—> 62 index
63 for index in (version.maybe_index(key_expr, all_matches)
64 for version in self.versions)

/opt/conda/default/lib/python3.6/site-packages/hail/experimental/db.py in (.0)
62 index
63 for index in (version.maybe_index(key_expr, all_matches)
—> 64 for version in self.versions)
65 if index is not None]
66 if len(compatible_indexed_values) == 0:

/opt/conda/default/lib/python3.6/site-packages/hail/experimental/db.py in maybe_index(self, indexer_key_expr, all_matches)
27
28 def maybe_index(self, indexer_key_expr, all_matches):
—> 29 return hl.read_table(self.url)._maybe_flexindex_table_by_expr(
30 indexer_key_expr, all_matches=all_matches)
31

in read_table(path, _intervals, _filter_intervals)

/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args
, kwargs
= check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 614 return original_func(*args, **kwargs)
615
616 return wrapper

/opt/conda/default/lib/python3.6/site-packages/hail/methods/impex.py in read_table(path, _intervals, _filter_intervals)
2431 :class:.Table
2432 “”"
-> 2433 for rg_config in Env.backend().load_references_from_dataset(path):
2434 hl.ReferenceGenome._from_config(rg_config)
2435

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in load_references_from_dataset(self, path)
348
349 def load_references_from_dataset(self, path):
–> 350 return json.loads(Env.hail().variant.ReferenceGenome.fromHailDataset(self.fs._jfs, path))
351
352 def from_fasta_file(self, name, fasta_file, index_file, x_contigs, y_contigs, mt_contigs, par):

/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in deco(*args, **kwargs)
40 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
41 ‘Hail version: %s\n’
—> 42 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
43 except pyspark.sql.utils.CapturedException as e:
44 raise FatalError(’%s\n\nJava stack trace:\n%s\n’

FatalError: HailException: MatrixTable and Table files are directories; path ‘gs://gnomad-public/release/2.1.1/ht/exomes/gnomad.exomes.r2.1.1.sites.ht’ is not a directory

Java stack trace:
is.hail.utils.HailException: MatrixTable and Table files are directories; path ‘gs://gnomad-public/release/2.1.1/ht/exomes/gnomad.exomes.r2.1.1.sites.ht’ is not a directory
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:9)
at is.hail.utils.package$.fatal(package.scala:77)
at is.hail.expr.ir.RelationalSpec$.readMetadata(AbstractMatrixTableSpec.scala:32)
at is.hail.expr.ir.RelationalSpec$.readReferences(AbstractMatrixTableSpec.scala:66)
at is.hail.variant.ReferenceGenome$.fromHailDataset(ReferenceGenome.scala:587)
at is.hail.variant.ReferenceGenome.fromHailDataset(ReferenceGenome.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)

Hail version: 0.2.57-582b2e31b8bd
Error summary: HailException: MatrixTable and Table files are directories; path ‘gs://gnomad-public/release/2.1.1/ht/exomes/gnomad.exomes.r2.1.1.sites.ht’ is not a directory

I think the gnomad data has moved to a different requester-pays bucket. We’ll need to update the annotation database metadata to point to the new location.

Can you kindly tell me how much time would it take

Shouldn’t take too long, I’d guess by end of week. You can also access the gnomad data directly and annotate as needed: https://gnomad.broadinstitute.org/downloads