Error Applying VEP

Hail version: 0.2.34-914bd8a10ca2
Error summary: GoogleJsonResponseException: 400 Bad Request
{
“code” : 400,
“errors” : [ {
“domain” : “global”,
“message” : “Bucket is requester pays bucket but no user project provided.”,
“reason” : “required”
} ],
“message” : “Bucket is requester pays bucket but no user project provided.”
}

While using hl.vep im facing the above error.
Kindly Help.

MY COMMAND; hailctl dataproc --beta start hailpy1 --vep GRCh37 --optional-components=ANACONDA,JUPYTER --enable-component-gateway --bucket analysts --project analysts --region us-central1 --num-preemptible-workers 20 --initialization-actions ‘gs://mybucket/init_notebook.py’

THE WHOLE TRACEBACK;

FatalError Traceback (most recent call last)
in
----> 1 vepData = hl.vep(data1, ‘gs://hail-us-vep/vep85-loftee-gcloud.json’)
2 vepData.show()

in vep(dataset, config, block_size, name, csq)

/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args
, kwargs
= check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 614 return original_func(*args, **kwargs)
615
616 return wrapper

/opt/conda/default/lib/python3.6/site-packages/hail/methods/qc.py in vep(dataset, config, block_size, name, csq)
603 ‘config’: config,
604 ‘csq’: csq,
–> 605 ‘blockSize’: block_size})).persist()
606
607 if csq:

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in init(self, tir)
340
341 self._tir = tir
–> 342 self._type = self._tir.typ
343
344 self._row_axis = ‘row’

/opt/conda/default/lib/python3.6/site-packages/hail/ir/base_ir.py in typ(self)
291 def typ(self):
292 if self._type is None:
–> 293 self._compute_type()
294 assert self._type is not None, self
295 return self._type

/opt/conda/default/lib/python3.6/site-packages/hail/ir/table_ir.py in _compute_type(self)
555 else:
556 assert name in (‘VEP’, ‘Nirvana’), name
–> 557 self._type = Env.backend().table_type(self)
558
559

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in table_type(self, tir)
305
306 def table_type(self, tir):
–> 307 jir = self._to_java_table_ir(tir)
308 return ttable._from_java(jir.typ())
309

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in _to_java_table_ir(self, ir)
283
284 def _to_java_table_ir(self, ir):
–> 285 return self._to_java_ir(ir, self._parse_table_ir)
286
287 def _to_java_matrix_ir(self, ir):

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in _to_java_ir(self, ir, parse)
276 r = CSERenderer(stop_at_jir=True)
277 # FIXME parse should be static
–> 278 ir._jir = parse(r(ir), ir_map=r.jirs)
279 return ir._jir
280

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in _parse_table_ir(self, code, ref_map, ir_map)
251
252 def _parse_table_ir(self, code, ref_map={}, ir_map={}):
–> 253 return self._jbackend.parse_table_ir(code, ref_map, ir_map)
254
255 def _parse_matrix_ir(self, code, ref_map={}, ir_map={}):

/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in deco(*args, **kwargs)
39 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
40 ‘Hail version: %s\n’
—> 41 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
42 except pyspark.sql.utils.CapturedException as e:
43 raise FatalError(’%s\n\nJava stack trace:\n%s\n’

FatalError: GoogleJsonResponseException: 400 Bad Request
{
“code” : 400,
“errors” : [ {
“domain” : “global”,
“message” : “Bucket is requester pays bucket but no user project provided.”,
“reason” : “required”
} ],
“message” : “Bucket is requester pays bucket but no user project provided.”
}

Java stack trace:
java.io.IOException: Error accessing gs://hail-us-vep/vep85-loftee-gcloud.json
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getObject(GoogleCloudStorageImpl.java:1945)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getItemInfo(GoogleCloudStorageImpl.java:1851)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.open(GoogleCloudStorageImpl.java:629)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.open(GoogleCloudStorageFileSystem.java:322)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.(GoogleHadoopFSInputStream.java:77)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.open(GoogleHadoopFileSystemBase.java:740)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:914)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:68)
at is.hail.io.fs.FS$class.open(FS.scala:139)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:56)
at is.hail.io.fs.FS$class.open(FS.scala:148)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:56)
at is.hail.methods.VEP$.readConfiguration(VEP.scala:34)
at is.hail.methods.VEP$.apply(VEP.scala:102)
at is.hail.methods.VEP$.fromJValue(VEP.scala:113)
at is.hail.expr.ir.functions.RelationalFunctions$.extractTo(RelationalFunctions.scala:137)
at is.hail.expr.ir.functions.RelationalFunctions$.lookupTableToTable(RelationalFunctions.scala:147)
at is.hail.expr.ir.IRParser$.table_ir_1(Parser.scala:1397)
at is.hail.expr.ir.IRParser$.table_ir(Parser.scala:1267)
at is.hail.expr.ir.IRParser$$anonfun$parse_table_ir$1.apply(Parser.scala:1736)
at is.hail.expr.ir.IRParser$$anonfun$parse_table_ir$1.apply(Parser.scala:1736)
at is.hail.expr.ir.IRParser$.parse(Parser.scala:1725)
at is.hail.expr.ir.IRParser$.parse_table_ir(Parser.scala:1736)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_table_ir$1.apply(SparkBackend.scala:512)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_table_ir$1.apply(SparkBackend.scala:511)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:20)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:18)
at is.hail.utils.package$.using(package.scala:601)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:18)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:229)
at is.hail.backend.spark.SparkBackend.parse_table_ir(SparkBackend.scala:511)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)

com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.json.GoogleJsonResponseException: 400 Bad Request
{
“code” : 400,
“errors” : [ {
“domain” : “global”,
“message” : “Bucket is requester pays bucket but no user project provided.”,
“reason” : “required”
} ],
“message” : “Bucket is requester pays bucket but no user project provided.”
}
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.json.GoogleJsonResponseException.from(GoogleJsonResponseException.java:150)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:113)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.json.AbstractGoogleJsonClientRequest.newExceptionOnError(AbstractGoogleJsonClientRequest.java:40)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest$1.interceptResponse(AbstractGoogleClientRequest.java:401)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1097)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:499)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:432)
at com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:549)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getObject(GoogleCloudStorageImpl.java:1939)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getItemInfo(GoogleCloudStorageImpl.java:1851)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.open(GoogleCloudStorageImpl.java:629)
at com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.open(GoogleCloudStorageFileSystem.java:322)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFSInputStream.(GoogleHadoopFSInputStream.java:77)
at com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.open(GoogleHadoopFileSystemBase.java:740)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:914)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:68)
at is.hail.io.fs.FS$class.open(FS.scala:139)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:56)
at is.hail.io.fs.FS$class.open(FS.scala:148)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:56)
at is.hail.methods.VEP$.readConfiguration(VEP.scala:34)
at is.hail.methods.VEP$.apply(VEP.scala:102)
at is.hail.methods.VEP$.fromJValue(VEP.scala:113)
at is.hail.expr.ir.functions.RelationalFunctions$.extractTo(RelationalFunctions.scala:137)
at is.hail.expr.ir.functions.RelationalFunctions$.lookupTableToTable(RelationalFunctions.scala:147)
at is.hail.expr.ir.IRParser$.table_ir_1(Parser.scala:1397)
at is.hail.expr.ir.IRParser$.table_ir(Parser.scala:1267)
at is.hail.expr.ir.IRParser$$anonfun$parse_table_ir$1.apply(Parser.scala:1736)
at is.hail.expr.ir.IRParser$$anonfun$parse_table_ir$1.apply(Parser.scala:1736)
at is.hail.expr.ir.IRParser$.parse(Parser.scala:1725)
at is.hail.expr.ir.IRParser$.parse_table_ir(Parser.scala:1736)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_table_ir$1.apply(SparkBackend.scala:512)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_table_ir$1.apply(SparkBackend.scala:511)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:20)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:18)
at is.hail.utils.package$.using(package.scala:601)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:18)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:229)
at is.hail.backend.spark.SparkBackend.parse_table_ir(SparkBackend.scala:511)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)

Hail version: 0.2.46-6ef64c08b000
Error summary: GoogleJsonResponseException: 400 Bad Request
{
“code” : 400,
“errors” : [ {
“domain” : “global”,
“message” : “Bucket is requester pays bucket but no user project provided.”,
“reason” : “required”
} ],
“message” : “Bucket is requester pays bucket but no user project provided.”
}

Update to latest hail version to resolve this.

1 Like

0.2.46 is not the latest version?
The traceback is from hail 0.2.46 which is the latest.

I was not using requester pays earlier, after using requester pay flag now im using it and facing the following error; when applying vep.


FatalError Traceback (most recent call last)
/opt/conda/default/lib/python3.6/site-packages/IPython/core/formatters.py in call(self, obj)
700 type_pprinters=self.type_printers,
701 deferred_pprinters=self.deferred_printers)
–> 702 printer.pretty(obj)
703 printer.flush()
704 return stream.getvalue()

/opt/conda/default/lib/python3.6/site-packages/IPython/lib/pretty.py in pretty(self, obj)
392 if cls is not object
393 and callable(cls.dict.get(‘repr’)):
–> 394 return _repr_pprint(obj, self, cycle)
395
396 return _default_pprint(obj, self, cycle)

/opt/conda/default/lib/python3.6/site-packages/IPython/lib/pretty.py in repr_pprint(obj, p, cycle)
682 “”“A pprint that just redirects to the normal repr function.”""
683 # Find newlines and replace them with p.break
()
–> 684 output = repr(obj)
685 lines = output.splitlines()
686 with p.group():

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in repr(self)
1267
1268 def repr(self):
-> 1269 return self.str()
1270
1271 def data(self):

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in str(self)
1264
1265 def str(self):
-> 1266 return self._ascii_str()
1267
1268 def repr(self):

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in _ascii_str(self)
1290 return s
1291
-> 1292 rows, has_more, dtype = self.data()
1293 fields = list(dtype)
1294 trunc_fields = [trunc(f) for f in fields]

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in data(self)
1274 row_dtype = t.row.dtype
1275 t = t.select(**{k: hl._showstr(v) for (k, v) in t.row.items()})
-> 1276 rows, has_more = t._take_n(self.n)
1277 self._data = (rows, has_more, row_dtype)
1278 return self._data

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in _take_n(self, n)
1421 has_more = False
1422 else:
-> 1423 rows = self.take(n + 1)
1424 has_more = len(rows) > n
1425 rows = rows[:n]

in take(self, n, _localize)

/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args
, kwargs
= check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 614 return original_func(*args, **kwargs)
615
616 return wrapper

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in take(self, n, _localize)
2085 “”"
2086
-> 2087 return self.head(n).collect(_localize)
2088
2089 @typecheck_method(n=int)

in collect(self, _localize)

/opt/conda/default/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args
, kwargs
= check_all(__original_func, args, kwargs, checkers, is_method=is_method)
–> 614 return original_func(*args, **kwargs)
615
616 return wrapper

/opt/conda/default/lib/python3.6/site-packages/hail/table.py in collect(self, _localize)
1884 e = construct_expr(rows_ir, hl.tarray(t.row.dtype))
1885 if _localize:
-> 1886 return Env.backend().execute(e._ir)
1887 else:
1888 return e

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in execute(self, ir, timed)
294 jir = self._to_java_value_ir(ir)
295 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
–> 296 result = json.loads(self._jhc.backend().executeJSON(jir))
297 value = ir.typ._from_json(result[‘value’])
298 timings = result[‘timings’]

/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:

/opt/conda/default/lib/python3.6/site-packages/hail/backend/spark_backend.py in deco(*args, **kwargs)
39 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
40 ‘Hail version: %s\n’
—> 41 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
42 except pyspark.sql.utils.CapturedException as e:
43 raise FatalError(’%s\n\nJava stack trace:\n%s\n’

FatalError: IOException: error=2, No such file or directory

Java stack trace:
java.lang.RuntimeException: error while applying lowering ‘InterpretNonCompilable’
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:26)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:18)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:18)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:28)
at is.hail.backend.spark.SparkBackend.is$hail$backend$spark$SparkBackend$$_execute(SparkBackend.scala:317)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:304)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:303)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:20)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:18)
at is.hail.utils.package$.using(package.scala:601)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:18)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:229)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:303)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:323)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)

org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.0 failed 20 times, most recent failure: Lost task 0.19 in stage 6.0 (TID 63, hailpy1-sw-rvrl.us-central1-c.c.cncd-analysts.internal, executor 2): java.io.IOException: Cannot run program “/vep”: error=2, No such file or directory
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
at is.hail.utils.richUtils.RichIterator$.pipe$extension(RichIterator.scala:55)
at is.hail.methods.VEP$$anonfun$7$$anonfun$apply$4.apply(VEP.scala:176)
at is.hail.methods.VEP$$anonfun$7$$anonfun$apply$4.apply(VEP.scala:172)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:308)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
atis.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:308)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at is.hail.sparkextras.ContextRDD.iterator(ContextRDD.scala:378)
at is.hail.sparkextras.RepartitionedOrderedRDD2$$anonfun$compute$1$$anonfun$apply$3.apply(RepartitionedOrderedRDD2.scala:60)
at is.hail.sparkextras.RepartitionedOrderedRDD2$$anonfun$compute$1$$anonfun$apply$3.apply(RepartitionedOrderedRDD2.scala:59)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$18.hasNext(Iterator.scala:762)
at scala.collection.Iterator$$anon$16.hasNext(Iterator.scala:598)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.io.RichContextRDDLong$$anonfun$boundary$extension$2$$anon$1.hasNext(RichContextRDDRegionValue.scala:188)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1002)
at is.hail.utils.richUtils.RichIterator$$anon$5.isValid(RichIterator.scala:30)
at is.hail.utils.StagingIterator.isValid(FlipbookIterator.scala:48)
at is.hail.utils.FlipbookIterator$$anon$9.setValue(FlipbookIterator.scala:331)
at is.hail.utils.FlipbookIterator$$anon$9.(FlipbookIterator.scala:344)
at is.hail.utils.FlipbookIterator.leftJoinDistinct(FlipbookIterator.scala:323)
at is.hail.annotations.OrderedRVIterator.leftJoinDistinct(OrderedRVIterator.scala:53)
at is.hail.rvd.KeyedRVD$$anonfun$orderedLeftJoinDistinct$1.apply(KeyedRVD.scala:151)
at is.hail.rvd.KeyedRVD$$anonfun$orderedLeftJoinDistinct$1.apply(KeyedRVD.scala:148)
at is.hail.sparkextras.ContextRDD$$anonfun$czipPartitions$1$$anonfun$apply$24.apply(ContextRDD.scala:305)
at is.hail.sparkextras.ContextRDD$$anonfun$czipPartitions$1$$anonfun$apply$24.apply(ContextRDD.scala:305)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$9$$anonfun$apply$10.apply(ContextRDD.scala:208)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$9$$anonfun$apply$10.apply(ContextRDD.scala:208)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.package$.getIteratorSizeWithMaxN(package.scala:388)
at is.hail.rvd.RVD$$anonfun$13$$anonfun$apply$10.apply(RVD.scala:526)
at is.hail.rvd.RVD$$anonfun$13$$anonfun$apply$10.apply(RVD.scala:526)
at is.hail.sparkextras.ContextRDD$$anonfun$runJob$1.apply(ContextRDD.scala:355)
at is.hail.sparkextras.ContextRDD$$anonfun$runJob$1.apply(ContextRDD.scala:353)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

java.io.IOException: error=2, No such file or directory
at java.lang.UNIXProcess.forkAndExec(Native Method)
at java.lang.UNIXProcess.(UNIXProcess.java:247)
at java.lang.ProcessImpl.start(ProcessImpl.java:134)
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
at is.hail.utils.richUtils.RichIterator$.pipe$extension(RichIterator.scala:55)
at is.hail.methods.VEP$$anonfun$7$$anonfun$apply$4.apply(VEP.scala:176)
at is.hail.methods.VEP$$anonfun$7$$anonfun$apply$4.apply(VEP.scala:172)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:221)
at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:299)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:357)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:308)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:346)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:310)
at is.hail.sparkextras.ContextRDD.iterator(ContextRDD.scala:378)
at is.hail.sparkextras.RepartitionedOrderedRDD2$$anonfun$compute$1$$anonfun$apply$3.apply(RepartitionedOrderedRDD2.scala:60)
at is.hail.sparkextras.RepartitionedOrderedRDD2$$anonfun$compute$1$$anonfun$apply$3.apply(RepartitionedOrderedRDD2.scala:59)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$18.hasNext(Iterator.scala:762)
at scala.collection.Iterator$$anon$16.hasNext(Iterator.scala:598)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.io.RichContextRDDLong$$anonfun$boundary$extension$2$$anon$1.hasNext(RichContextRDDRegionValue.scala:188)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$1.hasNext(Iterator.scala:1002)
at is.hail.utils.richUtils.RichIterator$$anon$5.isValid(RichIterator.scala:30)
at is.hail.utils.StagingIterator.isValid(FlipbookIterator.scala:48)
at is.hail.utils.FlipbookIterator$$anon$9.setValue(FlipbookIterator.scala:331)
at is.hail.utils.FlipbookIterator$$anon$9.(FlipbookIterator.scala:344)
at is.hail.utils.FlipbookIterator.leftJoinDistinct(FlipbookIterator.scala:323)
at is.hail.annotations.OrderedRVIterator.leftJoinDistinct(OrderedRVIterator.scala:53)
at is.hail.rvd.KeyedRVD$$anonfun$orderedLeftJoinDistinct$1.apply(KeyedRVD.scala:151)
at is.hail.rvd.KeyedRVD$$anonfun$orderedLeftJoinDistinct$1.apply(KeyedRVD.scala:148)
at is.hail.sparkextras.ContextRDD$$anonfun$czipPartitions$1$$anonfun$apply$24.apply(ContextRDD.scala:305)
at is.hail.sparkextras.ContextRDD$$anonfun$czipPartitions$1$$anonfun$apply$24.apply(ContextRDD.scala:305)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$9$$anonfun$apply$10.apply(ContextRDD.scala:208)
at is.hail.sparkextras.ContextRDD$$anonfun$cmapPartitions$1$$anonfun$apply$9$$anonfun$apply$10.apply(ContextRDD.scala:208)
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.richUtils.RichContextRDD$$anonfun$cleanupRegions$1$$anon$1.hasNext(RichContextRDD.scala:31)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.utils.package$.getIteratorSizeWithMaxN(package.scala:388)
at is.hail.rvd.RVD$$anonfun$13$$anonfun$apply$10.apply(RVD.scala:526)
at is.hail.rvd.RVD$$anonfun$13$$anonfun$apply$10.apply(RVD.scala:526)
at is.hail.sparkextras.ContextRDD$$anonfun$runJob$1.apply(ContextRDD.scala:355)
at is.hail.sparkextras.ContextRDD$$anonfun$runJob$1.apply(ContextRDD.scala:353)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2101)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:123)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)

Hail version: 0.2.46-6ef64c08b000
Error summary: IOException: error=2, No such file or directory

Help?

Originally you were on an older version. Yes, 0.2.46 is latest version. It says you don’t have vep installed on the cluster, which is weird because --vep GRCh37 should install it.

What’s the current hailctl command you’re running to start a cluster? And what gcloud command prints out when you run it? You can see what gcloud command you’d get without starting a cluster by using the --dry-run argument.

1 Like

My guess is problem stems from you using --initialization-actions instead of --init.

1 Like

MY COMMAND:-
hailctl dataproc --beta start hailpy9 --vep GRCh37 --optional-components=ANACONDA,JUPYTER --enable-component-gateway --bucket cncd_analysts --project cncd-analysts --region us-central1 --num-preemptible-workers 20 --initialization-actions ‘gs://cncd_analysts/hail-bucket/init_notebook.py’

GCLOUD COMMAND THAT PRINTS OUT:-
Your active configuration is: [cloudshell-19775]
gcloud beta dataproc clusters create
hailpy9
–image-version=1.4-debian9
–properties=spark:spark.driver.maxResultSize=0,spark:spark.task.maxFailures=20,spark:spark.kryoserializer.buffer.max=1g,spark:spark.driver
.extraJavaOptions=-Xss4M,spark:spark.executor.extraJavaOptions=-Xss4M,hdfs:dfs.replication=1,dataproc:dataproc.logging.stackdriver.enable=false
,dataproc:dataproc.monitoring.stackdriver.enable=false,spark:spark.driver.memory=41g
–initialization-actions=gs://hail-common/hailctl/dataproc/0.2.23/init_notebook.py,gs://hail-common/hailctl/dataproc/0.2.23/vep-GRCh37.sh
–metadata=^|||^WHEEL=gs://hail-common/hailctl/dataproc/0.2.23/hail-0.2.23-py3-none-any.whl|||PKGS=aiohttp>=3.6,<3.7|aiohttp_session>=2.7,<
2.8|bokeh>1.1,<1.3|decorator<5|gcsfs==0.2.1|hurry.filesize==0.9|ipykernel<5|nest_asyncio|numpy<2|pandas>0.22,<0.24|parsimonious<0.9|PyJWT|pytho
n-json-logger==0.1.11|requests>=2.21.0,<2.21.1|scipy>1.2,<1.4|tabulate==0.8.3
–master-machine-type=n1-highmem-8
–master-boot-disk-size=100GB
–num-master-local-ssds=0
–num-preemptible-workers=20
–num-worker-local-ssds=0
–num-workers=2
WARNING: The --num-preemptible-workers flag is deprecated. Use the --num-secondary-workers flag instead.
WARNING: The --preemptible-worker-boot-disk-size flag is deprecated. Use the --secondary-worker-boot-disk-size flag instead.

See the solution here: I cannot start a Hail cluster when using the optional-components Dataproc flag

1 Like