Hello
i am doing the following code
from pyspark.sql import SparkSession
import hail as hl
builder = (
SparkSession
.builder
.enableHiveSupport()
)
spark = builder.getOrCreate()
hl.init(sc=spark.sparkContext,driver_cores=16, worker_memory=‘highmem’)
VCFs_path = “file:///mnt/project/Bulk/Exome sequences/Population level exome OQFE variants, pVCF format - final release/*.vcf.gz”
mt = hl.import_vcf(VCFs_path,
force_bgz=True,
reference_genome=“GRCh38”,
array_elements_required=False)
db_name = “ukb_project”
mt_name = “pVCF.mt”
stmt = f"CREATE DATABASE IF NOT EXISTS {db_name} LOCATION ‘dnax://’"
print(stmt)
spark.sql(stmt).show()
import dxpy
db_uri = dxpy.find_one_data_object(name=f"{db_name}“, classname=“database”)[‘id’]
url = f"dnax://{db_uri}/{mt_name}”
mt.write(url) and i am using java version =11.0.13 and hail version = 0.2.132-678e1f52b999 when i run the last row i am getting this error ---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
Cell In[10], line 1
----> 1 mt.write(url)
File :2, in write(self, output, overwrite, stage_locally, _codec_spec, _partitions)
File /opt/conda/lib/python3.11/site-packages/hail/typecheck/check.py:585, in _make_dec..wrapper(__original_func, *args, **kwargs)
582 @decorator
583 def wrapper(__original_func: Callable[…, T], *args, **kwargs) → T:
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
→ 585 return __original_func(*args_, **kwargs_)
File /opt/conda/lib/python3.11/site-packages/hail/matrixtable.py:2810, in MatrixTable.write(self, output, overwrite, stage_locally, _codec_spec, _partitions)
2807 _partitions_type = None
2809 writer = ir.MatrixNativeWriter(output, overwrite, stage_locally, _codec_spec, _partitions, _partitions_type)
→ 2810 Env.backend().execute(ir.MatrixWrite(self._mir, writer))
File /opt/conda/lib/python3.11/site-packages/hail/backend/spark_backend.py:217, in SparkBackend.execute(self, ir, timed)
214 except Exception as fatal:
215 raise err from fatal
→ 217 raise err
File /opt/conda/lib/python3.11/site-packages/hail/backend/spark_backend.py:209, in SparkBackend.execute(self, ir, timed)
207 def execute(self, ir: BaseIR, timed: bool = False) → Any:
208 try:
→ 209 return super().execute(ir, timed)
210 except Exception as err:
211 if self._copy_log_on_error:
File /opt/conda/lib/python3.11/site-packages/hail/backend/backend.py:181, in Backend.execute(self, ir, timed)
179 result, timings = self._rpc(ActionTag.EXECUTE, payload)
180 except FatalError as e:
→ 181 raise e.maybe_user_error(ir) from None
182 if ir.typ == tvoid:
183 value = None
File /opt/conda/lib/python3.11/site-packages/hail/backend/backend.py:179, in Backend.execute(self, ir, timed)
177 payload = ExecutePayload(self._render_ir(ir), ‘{“name”:“StreamBufferSpec”}’, timed)
178 try:
→ 179 result, timings = self._rpc(ActionTag.EXECUTE, payload)
180 except FatalError as e:
181 raise e.maybe_user_error(ir) from None
File /opt/conda/lib/python3.11/site-packages/hail/backend/py4j_backend.py:221, in Py4JBackend._rpc(self, action, payload)
219 if resp.status_code >= 400:
220 error_json = orjson.loads(resp.content)
→ 221 raise fatal_error_from_java_error_triplet(
222 error_json[‘short’], error_json[‘expanded’], error_json[‘error_id’]
223 )
224 return resp.content, resp.headers.get(‘X-Hail-Timings’, ‘’)
FatalError: ClassNotFoundException: is.hail.backend.spark.SparkBackend$$anon$5$RDDPartition
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 2307 in stage 2.0 failed 4 times, most recent failure: Lost task 2307.4 in stage 2.0 (TID 4610) (ip-10-60-49-147.eu-west-2.compute.internal executor 17): java.lang.ClassNotFoundException: is.hail.backend.spark.SparkBackend$$anon$5$RDDPartition
at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:476)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:594)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:527)
at java.base/java.lang.Class.forName0(Native Method)
at java.base/java.lang.Class.forName(Class.java:398)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:71)
at java.base/java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)
at java.base/java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1870)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2201)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1687)
at java.base/java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2496)
at java.base/java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2390)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2228)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1687)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:489)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:447)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:87)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:129)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:579)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2956)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2886)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2885)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2885)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1341)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1341)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1341)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3161)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3095)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3084)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1073)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2393)
at is.hail.backend.spark.SparkBackend.$anonfun$parallelizeAndComputeWithIndex$4(SparkBackend.scala:455)
at is.hail.backend.spark.SparkBackend.$anonfun$parallelizeAndComputeWithIndex$4$adapted(SparkBackend.scala:454)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at is.hail.backend.spark.SparkBackend.parallelizeAndComputeWithIndex(SparkBackend.scala:454)
at is.hail.backend.BackendUtils.collectDArray(BackendUtils.scala:85)
at __C5Compiled.apply(Emit.scala)
at is.hail.expr.ir.LoweredTableReader$.$anonfun$makeCoercer$15(TableIR.scala:402)
at is.hail.backend.ExecuteContext.$anonfun$scopedExecution$1(ExecuteContext.scala:144)
at is.hail.utils.package$.using(package.scala:673)
at is.hail.backend.ExecuteContext.scopedExecution(ExecuteContext.scala:144)
at is.hail.expr.ir.LoweredTableReader$.makeCoercer(TableIR.scala:401)
at is.hail.expr.ir.GenericTableValue.getLTVCoercer(GenericTableValue.scala:187)
at is.hail.expr.ir.GenericTableValue.toTableStage(GenericTableValue.scala:220)
at is.hail.io.vcf.MatrixVCFReader.lower(LoadVCF.scala:2140)
at is.hail.expr.ir.TableReader.lower(TableIR.scala:610)
at is.hail.expr.ir.lowering.LowerTableIR$.applyTable(LowerTableIR.scala:1062)
at is.hail.expr.ir.lowering.LowerTableIR$.lower$1(LowerTableIR.scala:728)
at is.hail.expr.ir.lowering.LowerTableIR$.apply(LowerTableIR.scala:1021)
at is.hail.expr.ir.lowering.LowerToCDA$.lower(LowerToCDA.scala:27)
at is.hail.expr.ir.lowering.LowerToCDA$.apply(LowerToCDA.scala:11)
at is.hail.expr.ir.lowering.LowerToDistributedArrayPass.transform(LoweringPass.scala:91)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.evaluate$1(LowerOrInterpretNonCompilable.scala:27)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.rewrite$1(LowerOrInterpretNonCompilable.scala:59)
at is.hail.expr.ir.LowerOrInterpretNonCompilable$.apply(LowerOrInterpretNonCompilable.scala:64)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.transform(LoweringPass.scala:83)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$3(LoweringPass.scala:32)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.expr.ir.lowering.LoweringPass.$anonfun$apply$1(LoweringPass.scala:32)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.expr.ir.lowering.LoweringPass.apply(LoweringPass.scala:30)
at is.hail.expr.ir.lowering.LoweringPass.apply$(LoweringPass.scala:29)
at is.hail.expr.ir.lowering.LowerOrInterpretNonCompilablePass$.apply(LoweringPass.scala:78)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1(LoweringPipeline.scala:21)
at is.hail.expr.ir.lowering.LoweringPipeline.$anonfun$apply$1$adapted(LoweringPipeline.scala:19)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:19)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:45)
at is.hail.backend.spark.SparkBackend._execute(SparkBackend.scala:578)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$4(SparkBackend.scala:614)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:84)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$3(SparkBackend.scala:609)
at is.hail.backend.spark.SparkBackend.$anonfun$execute$3$adapted(SparkBackend.scala:608)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$3(ExecuteContext.scala:78)
at is.hail.utils.package$.using(package.scala:673)
at is.hail.backend.ExecuteContext$.$anonfun$scoped$2(ExecuteContext.scala:78)
at is.hail.utils.package$.using(package.scala:673)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:13)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:65)
at is.hail.backend.spark.SparkBackend.$anonfun$withExecuteContext$2(SparkBackend.scala:411)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:55)
at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:62)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:397)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:608)
at is.hail.backend.BackendHttpHandler.handle(BackendServer.scala:88)
at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:77)
at jdk.httpserver/sun.net.httpserver.AuthFilter.doFilter(AuthFilter.java:82)
at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:80)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Exchange$LinkHandler.handle(ServerImpl.java:725)
at jdk.httpserver/com.sun.net.httpserver.Filter$Chain.doFilter(Filter.java:77)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Exchange.run(ServerImpl.java:694)
at jdk.httpserver/sun.net.httpserver.ServerImpl$DefaultExecutor.execute(ServerImpl.java:159)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Dispatcher.handle(ServerImpl.java:446)
at jdk.httpserver/sun.net.httpserver.ServerImpl$Dispatcher.run(ServerImpl.java:412)
at java.base/java.lang.Thread.run(Thread.java:829)
java.lang.ClassNotFoundException: is.hail.backend.spark.SparkBackend$$anon$5$RDDPartition
at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:476)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:594)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:527)
at java.base/java.lang.Class.forName0(Native Method)
at java.base/java.lang.Class.forName(Class.java:398)
at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:71)
at java.base/java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003)
at java.base/java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1870)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2201)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1687)
at java.base/java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2496)
at java.base/java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2390)
at java.base/java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2228)
at java.base/java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1687)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:489)
at java.base/java.io.ObjectInputStream.readObject(ObjectInputStream.java:447)
at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:87)
at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:129)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:579)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Hail version: 0.2.132-678e1f52b999
Error summary: ClassNotFoundException: is.hail.backend.spark.SparkBackend$$anon$5$RDDPartition Can someone help me