HailException: invalid interval expression

Hi,

When I try to filter my mt file like this
ds = hl.filter_intervals(vds,[hl.parse_locus_interval(x) for x in [‘20-22’]], keep=True)

I got this error: HailException: invalid interval expression: `20-22’. The genome reference for my dataset is GRCh38, so I initialized Hail like this: hl.init(default_reference=‘GRCh38’). I did this filter in other dataset and I didn’t get this error (the others had GRCh37 as reference genome).

FatalError Traceback (most recent call last)
in ()
----> 1 vds = hl.filter_intervals(vds,[hl.parse_locus_interval(x) for x in [‘20-22’]], keep=True)

/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
–> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)

/home/hail/hail.zip/hail/methods/misc.py in filter_intervals(ds, intervals, keep)
314 return interval
315
–> 316 intervals = [wrap_input(x)._jrep for x in intervals.value]
317 jmt = Env.hail().methods.FilterIntervals.apply(ds._jvds, intervals, keep)
318 return MatrixTable(jmt)

/home/hail/hail.zip/hail/expr/expressions/base_expression.py in value(self)
784
785 “”"
–> 786 return hl.eval_expr(self)
787
788 def _aggregation_method(self):

/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
–> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)

/home/hail/hail.zip/hail/expr/expressions/expression_utils.py in eval_expr(expression)
135 Result of evaluating expression.
136 “”"
–> 137 return eval_expr_typed(expression)[0]
138
139

/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
–> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)

/home/hail/hail.zip/hail/expr/expressions/expression_utils.py in eval_expr_typed(expression)
169 analyze(‘eval_expr_typed’, expression, Indices(expression._indices.source))
170
–> 171 return expression.collect()[0], expression.dtype
172
173

/home/hail/hail.zip/hail/expr/expressions/base_expression.py in collect(self)
767 uid = Env.get_uid()
768 t = self._to_table(uid)
–> 769 return [r[uid] for r in t._select(“collect”, None, hl.struct(**{uid: t[uid]})).collect()]
770
771 @property

/home/hail/hail.zip/hail/table.py in collect(self)
1590 List of rows.
1591 “”"
-> 1592 return hl.tarray(self.row.dtype)._from_json(self._jt.collectJSON())
1593
1594 def describe(self):

/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in call(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:

/home/hail/hail.zip/hail/utils/java.py in deco(*args, **kwargs)
198 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
199 ‘Hail version: %s\n’
–> 200 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
201 except pyspark.sql.utils.CapturedException as e:
202 raise FatalError(’%s\n\nJava stack trace:\n%s\n’

FatalError: HailException: invalid interval expression: `20-22’:

Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.0 failed 4 times, most recent failure: Lost task 0.3 in stage 6.0 (TID 21, hail120-w-3.c.avl-hail-ines.internal, executor 3): is.hail.utils.HailException: invalid interval expression: `20-22’:
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:9)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.expr.Parser$.parseLocusInterval(Parser.scala:133)
at is.hail.variant.Locus$.parseInterval(Locus.scala:53)
at is.hail.codegen.generated.C45.method1(Unknown Source)
at is.hail.codegen.generated.C45.apply(Unknown Source)
at is.hail.codegen.generated.C45.apply(Unknown Source)
at is.hail.expr.ir.TableMapRows$$anonfun$24$$anonfun$apply$12.apply(TableIR.scala:504)
at is.hail.expr.ir.TableMapRows$$anonfun$24$$anonfun$apply$12.apply(TableIR.scala:493)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1517)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1505)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1504)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1504)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1732)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1687)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1676)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2050)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2069)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094)
at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:936)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.collect(RDD.scala:935)
at is.hail.table.Table.collect(Table.scala:655)
at is.hail.table.Table.collectJSON(Table.scala:658)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)is.hail.utils.HailException: invalid interval expression: `20-22’:
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:9)
at is.hail.utils.package$.fatal(package.scala:26)
at is.hail.expr.Parser$.parseLocusInterval(Parser.scala:133)
at is.hail.variant.Locus$.parseInterval(Locus.scala:53)
at is.hail.codegen.generated.C45.method1(Unknown Source)
at is.hail.codegen.generated.C45.apply(Unknown Source)
at is.hail.codegen.generated.C45.apply(Unknown Source)
at is.hail.expr.ir.TableMapRows$$anonfun$24$$anonfun$apply$12.apply(TableIR.scala:504)
at is.hail.expr.ir.TableMapRows$$anonfun$24$$anonfun$apply$12.apply(TableIR.scala:493)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$11.next(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1336)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.rdd.RDD$$anonfun$collect$1$$anonfun$13.apply(RDD.scala:936)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
at org.apache.spark.scheduler.Task.run(Task.scala:108)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Hail version: devel-7d33e65d8423
Error summary: HailException: invalid interval expression: `20-22’:

Thanks,

Laura

Sorry these error messages aren’t great – the problem is GREATLY complicated by someone’s bright idea to allow contig names to include all sorts of things like punctuation (: or - are totally legal), meaning our interval parser is pretty complex.

The problem here is that neither 20 or 22 are contigs in GRCh38 – it should be chr20-chr22.