@tpoterba thank you for looking into this!
It is an insanely long error, and since I couldn’t find a way to attach a file, I post a link to Google Drive where you can find a full error message:
Here are just first 411 lines of the error message below:
FatalError Traceback (most recent call last)
File ~/venv/lib/python3.8/site-packages/IPython/core/formatters.py:706, in PlainTextFormatter.__call__(self, obj)
699 stream = StringIO()
700 printer = pretty.RepresentationPrinter(stream, self.verbose,
701 self.max_width, self.newline,
702 max_seq_length=self.max_seq_length,
703 singleton_pprinters=self.singleton_printers,
704 type_pprinters=self.type_printers,
705 deferred_pprinters=self.deferred_printers)
--> 706 printer.pretty(obj)
707 printer.flush()
708 return stream.getvalue()
File ~/venv/lib/python3.8/site-packages/IPython/lib/pretty.py:410, in RepresentationPrinter.pretty(self, obj)
407 return meth(obj, self, cycle)
408 if cls is not object \
409 and callable(cls.__dict__.get('__repr__')):
--> 410 return _repr_pprint(obj, self, cycle)
412 return _default_pprint(obj, self, cycle)
413 finally:
File ~/venv/lib/python3.8/site-packages/IPython/lib/pretty.py:778, in _repr_pprint(obj, p, cycle)
776 """A pprint that just redirects to the normal repr function."""
777 # Find newlines and replace them with p.break_()
--> 778 output = repr(obj)
779 lines = output.splitlines()
780 with p.group():
File ~/venv/lib/python3.8/site-packages/hail/matrixtable.py:2599, in MatrixTable._Show.__repr__(self)
2598 def __repr__(self):
-> 2599 return self.__str__()
File ~/venv/lib/python3.8/site-packages/hail/matrixtable.py:2593, in MatrixTable._Show.__str__(self)
2592 def __str__(self):
-> 2593 s = self.table_show.__str__()
2594 if self.displayed_n_cols != self.actual_n_cols:
2595 s += f"showing the first { self.displayed_n_cols } of { self.actual_n_cols } columns"
File ~/venv/lib/python3.8/site-packages/hail/table.py:1489, in Table._Show.__str__(self)
1488 def __str__(self):
-> 1489 return self._ascii_str()
File ~/venv/lib/python3.8/site-packages/hail/table.py:1515, in Table._Show._ascii_str(self)
1512 return s[:truncate - 3] + "..."
1513 return s
-> 1515 rows, has_more, dtype = self.data()
1516 fields = list(dtype)
1517 trunc_fields = [trunc(f) for f in fields]
File ~/venv/lib/python3.8/site-packages/hail/table.py:1499, in Table._Show.data(self)
1497 row_dtype = t.row.dtype
1498 t = t.select(**{k: hl._showstr(v) for (k, v) in t.row.items()})
-> 1499 rows, has_more = t._take_n(self.n)
1500 self._data = (rows, has_more, row_dtype)
1501 return self._data
File ~/venv/lib/python3.8/site-packages/hail/table.py:1646, in Table._take_n(self, n)
1644 has_more = False
1645 else:
-> 1646 rows = self.take(n + 1)
1647 has_more = len(rows) > n
1648 rows = rows[:n]
File <decorator-gen-1094>:2, in take(self, n, _localize)
File ~/venv/lib/python3.8/site-packages/hail/typecheck/check.py:577, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
574 @decorator
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
File ~/venv/lib/python3.8/site-packages/hail/table.py:2319, in Table.take(self, n, _localize)
2285 @typecheck_method(n=int, _localize=bool)
2286 def take(self, n, _localize=True):
2287 """Collect the first `n` rows of the table into a local list.
2288
2289 Examples
(...)
2316 List of row structs.
2317 """
-> 2319 return self.head(n).collect(_localize)
File <decorator-gen-1088>:2, in collect(self, _localize, _timed)
File ~/venv/lib/python3.8/site-packages/hail/typecheck/check.py:577, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
574 @decorator
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
File ~/venv/lib/python3.8/site-packages/hail/table.py:2118, in Table.collect(self, _localize, _timed)
2116 e = construct_expr(rows_ir, hl.tarray(t.row.dtype))
2117 if _localize:
-> 2118 return Env.backend().execute(e._ir, timed=_timed)
2119 else:
2120 return e
File ~/venv/lib/python3.8/site-packages/hail/backend/py4j_backend.py:104, in Py4JBackend.execute(self, ir, timed)
102 return (value, timings) if timed else value
103 except FatalError as e:
--> 104 self._handle_fatal_error_from_backend(e, ir)
File ~/venv/lib/python3.8/site-packages/hail/backend/backend.py:181, in Backend._handle_fatal_error_from_backend(self, err, ir)
179 error_sources = ir.base_search(lambda x: x._error_id == err._error_id)
180 if len(error_sources) == 0:
--> 181 raise err
183 better_stack_trace = error_sources[0]._stack_trace
184 error_message = str(err)
File ~/venv/lib/python3.8/site-packages/hail/backend/py4j_backend.py:98, in Py4JBackend.execute(self, ir, timed)
96 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
97 try:
---> 98 result_tuple = self._jbackend.executeEncode(jir, stream_codec, timed)
99 (result, timings) = (result_tuple._1(), result_tuple._2())
100 value = ir.typ._from_encoding(result)
File ~/venv/lib/python3.8/site-packages/py4j/java_gateway.py:1304, in JavaMember.__call__(self, *args)
1298 command = proto.CALL_COMMAND_NAME +\
1299 self.command_header +\
1300 args_command +\
1301 proto.END_COMMAND_PART
1303 answer = self.gateway_client.send_command(command)
-> 1304 return_value = get_return_value(
1305 answer, self.gateway_client, self.target_id, self.name)
1307 for temp_arg in temp_args:
1308 temp_arg._detach()
File ~/venv/lib/python3.8/site-packages/hail/backend/py4j_backend.py:31, in handle_java_exception.<locals>.deco(*args, **kwargs)
29 tpl = Env.jutils().handleForPython(e.java_exception)
30 deepest, full, error_id = tpl._1(), tpl._2(), tpl._3()
---> 31 raise fatal_error_from_java_error_triplet(deepest, full, error_id) from None
32 except pyspark.sql.utils.CapturedException as e:
33 raise FatalError('%s\n\nJava stack trace:\n%s\n'
34 'Hail version: %s\n'
35 'Error summary: %s' % (e.desc, e.stackTrace, hail.__version__, e.desc)) from None
FatalError: RemoteException: File does not exist: /lustre/path/to/data/dataset.mt/index/part-00000-0f98f82e-3293-40fd-8fb4-d41990c8ac58.idx/metadata.json.gz
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2071)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:773)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:600)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:568)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:552)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1093)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1035)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2966)
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 14.0 failed 4 times, most recent failure: Lost task 0.3 in stage 14.0 (TID 27341) (192.168.252.189 executor 33): java.io.FileNotFoundException: File does not exist: /lustre/path/to/data/dataset.mt/index/part-00000-0f98f82e-3293-40fd-8fb4-d41990c8ac58.idx/metadata.json.gz
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2071)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:773)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:600)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:568)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:552)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1093)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1035)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2966)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:894)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:881)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:870)
at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1038)
at org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:339)
at org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:335)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:352)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:976)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:91)
at is.hail.io.fs.FS.open(FS.scala:354)
at is.hail.io.fs.FS.open$(FS.scala:353)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:72)
at is.hail.io.fs.FS.open(FS.scala:366)
at is.hail.io.fs.FS.open$(FS.scala:365)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:72)
at is.hail.io.fs.FS.open(FS.scala:363)
at is.hail.io.fs.FS.open$(FS.scala:362)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:72)
at is.hail.io.index.IndexReader$.readUntyped(IndexReader.scala:43)
at __C179collect_distributed_array_table_head_recursive_count.__m198split_StreamLen(Unknown Source)
at __C179collect_distributed_array_table_head_recursive_count.apply(Unknown Source)
at __C179collect_distributed_array_table_head_recursive_count.apply(Unknown Source)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$4(BackendUtils.scala:48)
at is.hail.utils.package$.using(package.scala:635)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$3(BackendUtils.scala:47)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:799)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:498)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:501)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.FileNotFoundException): File does not exist: /lustre/path/to/data/dataset.mt/index/part-00000-0f98f82e-3293-40fd-8fb4-d41990c8ac58.idx/metadata.json.gz
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2071)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:773)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:600)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:568)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:552)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1093)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1035)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2966)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1573)
at org.apache.hadoop.ipc.Client.call(Client.java:1519)
at org.apache.hadoop.ipc.Client.call(Client.java:1416)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:242)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:129)
at com.sun.proxy.$Proxy19.getBlockLocations(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:333)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:422)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:165)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:157)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:359)
at com.sun.proxy.$Proxy20.getBlockLocations(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:892)
... 37 more
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2303)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2252)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2251)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2251)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1124)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1124)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1124)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2490)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2432)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2421)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:902)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2196)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2217)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2236)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2261)
at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1030)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:414)
at org.apache.spark.rdd.RDD.collect(RDD.scala:1029)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:355)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:43)
at __C126Compiled.__m163split_TailLoop_region4_7(Emit.scala)
at __C126Compiled.__m163split_TailLoop(Emit.scala)
at __C126Compiled.__m127split_ToArray(Emit.scala)
at __C126Compiled.apply(Emit.scala)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$_apply$6(CompileAndEvaluate.scala:68)
at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.java:23)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:68)
at is.hail.expr.ir.CompileAndEvaluate$.$anonfun$apply$1(CompileAndEvaluate.scala:19)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.CompileAndEvaluate$.apply(CompileAndEvaluate.scala:19)
at is.hail.expr.ir.lowering.LowerTableIR$.applyTable(LowerTableIR.scala:1093)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$2(LowerTableIR.scala:731)
at is.hail.expr.ir.lowering.LowerTableIR$.applyTable(LowerTableIR.scala:1216)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:493)
at is.hail.expr.ir.lowering.LowerTableIR$.apply(LowerTableIR.scala:572)
at is.hail.expr.ir.lowering.LowerToCDA$.lower(LowerToCDA.scala:73)
at is.hail.expr.ir.lowering.LowerToCDA$.apply(LowerToCDA.scala:18)
at is.hail.expr.ir.lowering.LowerToDistributedArrayPass.transform(LoweringPass.scala:77)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:27)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:67)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:53)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:72)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:69)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:16)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:14)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:64)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:15)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:13)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:13)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:47)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:450)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$2(SparkBackend.scala:486)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:70)
at is.hail.utils.package$.using(package.scala:635)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:70)
at is.hail.utils.package$.using(package.scala:635)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:59)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:339)
at is.hail.backend.spark.SparkBackend.$anonfun$executeEncode$1(SparkBackend.scala:483)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeEncode(SparkBackend.scala:482)
at sun.reflect.GeneratedMethodAccessor45.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:750)
java.io.FileNotFoundException: File does not exist: /lustre/path/to/data/dataset.mt/index/part-00000-0f98f82e-3293-40fd-8fb4-d41990c8ac58.idx/metadata.json.gz
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:86)
at org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:76)
at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getBlockLocations(FSDirStatAndListingOp.java:156)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getBlockLocations(FSNamesystem.java:2071)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getBlockLocations(NameNodeRpcServer.java:773)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getBlockLocations(ClientNamenodeProtocolServerSideTranslatorPB.java:458)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:600)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:568)
at org.apache.hadoop.ipc.ProtobufRpcEngine2$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine2.java:552)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1093)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:1035)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:963)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1878)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2966)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
at org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:894)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:881)
at org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:870)
at org.apache.hadoop.hdfs.DFSClient.open(DFSClient.java:1038)
at org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:339)
at org.apache.hadoop.hdfs.DistributedFileSystem$4.doCall(DistributedFileSystem.java:335)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.open(DistributedFileSystem.java:352)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:976)
at is.hail.io.fs.HadoopFS.openNoCompression(HadoopFS.scala:91)
at is.hail.io.fs.FS.open(FS.scala:354)
at is.hail.io.fs.FS.open$(FS.scala:353)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:72)
at is.hail.io.fs.FS.open(FS.scala:366)
at is.hail.io.fs.FS.open$(FS.scala:365)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:72)
at is.hail.io.fs.FS.open(FS.scala:363)
at is.hail.io.fs.FS.open$(FS.scala:362)
at is.hail.io.fs.HadoopFS.open(HadoopFS.scala:72)
at is.hail.io.index.IndexReader$.readUntyped(IndexReader.scala:43)
at __C179collect_distributed_array_table_head_recursive_count.__m198split_StreamLen(Unknown Source)
at __C179collect_distributed_array_table_head_recursive_count.apply(Unknown Source)
at __C179collect_distributed_array_table_head_recursive_count.apply(Unknown Source)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$4(BackendUtils.scala:48)
at is.hail.utils.package$.using(package.scala:635)
at is.hail.annotations.RegionPool.scopedRegion(RegionPool.scala:162)
at is.hail.backend.BackendUtils.$anonfun$collectDArray$3(BackendUtils.scala:47)
at is.hail.backend.spark.SparkBackendComputeRDD.compute(SparkBackend.scala:799)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:337)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:131)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:498)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:501)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)