Cannot write vep annotations

I’m trying to run Hail for VEP annotations on a computation cluster and got the following error.

File "/opt/conda/lib/python3.7/site-packages/hail/methods/qc.py", line 621, in vep
    'tolerateParseError': tolerate_parse_error})).persist()
  File "<decorator-gen-1129>", line 2, in persist
  File "/opt/conda/lib/python3.7/site-packages/hail/typecheck/check.py", line 577, in wrapper
    return __original_func(*args_, **kwargs_)
  File "/opt/conda/lib/python3.7/site-packages/hail/table.py", line 1870, in persist
    return Env.backend().persist_table(self, storage_level)
  File "/opt/conda/lib/python3.7/site-packages/hail/backend/spark_backend.py", line 290, in persist_table
    return Table._from_java(self._jbackend.pyPersistTable(storage_level, self._to_java_table_ir(t._tir)))
  File "/opt/conda/lib/python3.7/site-packages/py4j/java_gateway.py", line 1305, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/opt/conda/lib/python3.7/site-packages/hail/backend/py4j_backend.py", line 31, in deco
    'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
hail.utils.java.FatalError: IOException: error=2, No such file or directory

Java stack trace:
java.io.IOException: Cannot run program "vep": error=2, No such file or directory
	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
	at is.hail.utils.richUtils.RichIterator$.pipe$extension(RichIterator.scala:87)
	at is.hail.methods.VEP$.getCSQHeaderDefinition(VEP.scala:86)
	at is.hail.methods.VEP.execute(VEP.scala:141)
	at is.hail.expr.ir.TableToTableApply.execute(TableIR.scala:2930)
	at is.hail.expr.ir.TableIR.analyzeAndExecute(TableIR.scala:58)
	at is.hail.expr.ir.Interpret$.apply(Interpret.scala:27)
	at is.hail.backend.spark.SparkBackend.$anonfun$pyPersistTable$2(SparkBackend.scala:502)
	at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:47)
	at is.hail.utils.package$.using(package.scala:638)
	at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:47)
	at is.hail.utils.package$.using(package.scala:638)
	at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
	at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:46)
	at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
	at is.hail.backend.spark.SparkBackend.$anonfun$pyPersistTable$1(SparkBackend.scala:501)
	at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
	at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:59)
	at is.hail.backend.spark.SparkBackend.pyPersistTable(SparkBackend.scala:493)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)

java.io.IOException: error=2, No such file or directory
	at java.lang.UNIXProcess.forkAndExec(Native Method)
	at java.lang.UNIXProcess.<init>(UNIXProcess.java:247)
	at java.lang.ProcessImpl.start(ProcessImpl.java:134)
	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
	at is.hail.utils.richUtils.RichIterator$.pipe$extension(RichIterator.scala:87)
	at is.hail.methods.VEP$.getCSQHeaderDefinition(VEP.scala:86)
	at is.hail.methods.VEP.execute(VEP.scala:141)
	at is.hail.expr.ir.TableToTableApply.execute(TableIR.scala:2930)
	at is.hail.expr.ir.TableIR.analyzeAndExecute(TableIR.scala:58)
	at is.hail.expr.ir.Interpret$.apply(Interpret.scala:27)
	at is.hail.backend.spark.SparkBackend.$anonfun$pyPersistTable$2(SparkBackend.scala:502)
	at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:47)
	at is.hail.utils.package$.using(package.scala:638)
	at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:47)
	at is.hail.utils.package$.using(package.scala:638)
	at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
	at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:46)
	at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
	at is.hail.backend.spark.SparkBackend.$anonfun$pyPersistTable$1(SparkBackend.scala:501)
	at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
	at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:59)
	at is.hail.backend.spark.SparkBackend.pyPersistTable(SparkBackend.scala:493)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)




Hail version: 0.2.79-f141af259254
Error summary: IOException: error=2, No such file or directory

I’ve looked through two posts back in 2019 about this issue, but the solutions posted there did not solve my problem. FYI our computational cluster uses docker to run scripts (so Hail was installed in the docker I’m using) and I don’t have ‘sudo’ permissions in the cluster. Thanks!

What kind of cluster are you using? Unless you’re using a hailctl dataproc cluster, you need to install vep yourself and ensure all the associated files are present where vep expects them to be. Basically, you need to ensure that running /vep yourself, outside of Hail, works correctly, and then Hail can use that program internally.