Hello. I am very new to data science, and even more new to bioinformatic field.
Now i will try to use realized_relationship_matrix
So, first : import vcf file by / ds = hl.import_vcf(‘75.vcf’)
then : rrm = hl.realized_relationship_matrix(ds.GT)
but the output is :
FatalError Traceback (most recent call last)
in ()
----> 1 rrm = hl.realized_relationship_matrix(ds.GT)in realized_relationship_matrix(call_expr)
~\Anaconda3\lib\site-packages\hail\typecheck\check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
→ 614 return original_func(*args, **kwargs)
615
616 return wrapper~\Anaconda3\lib\site-packages\hail\methods\statgen.py in realized_relationship_matrix(call_expr)
1993
1994 normalized_gt = hl.or_else((mt.__gt - mt.__mean_gt) / mt.__centered_length, 0.0)
→ 1995 bm = BlockMatrix.from_entry_expr(normalized_gt)
1996
1997 return (bm.T @ bm) / (bm.n_rows / bm.n_cols)in from_entry_expr(cls, entry_expr, mean_impute, center, normalize, axis, block_size)
~\Anaconda3\lib\site-packages\hail\typecheck\check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
→ 614 return original_func(*args, **kwargs)
615
616 return wrapper~\Anaconda3\lib\site-packages\hail\linalg\blockmatrix.py in from_entry_expr(cls, entry_expr, mean_impute, center, normalize, axis, block_size)
408 path = new_temp_file()
409 cls.write_from_entry_expr(entry_expr, path, overwrite=False, mean_impute=mean_impute,
→ 410 center=center, normalize=normalize, axis=axis, block_size=block_size)
411 return cls.read(path)
412in write_from_entry_expr(entry_expr, path, overwrite, mean_impute, center, normalize, axis, block_size)
~\Anaconda3\lib\site-packages\hail\typecheck\check.py in wrapper(__original_func, *args, **kwargs)
612 def wrapper(original_func, *args, **kwargs):
613 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
→ 614 return original_func(*args, **kwargs)
615
616 return wrapper~\Anaconda3\lib\site-packages\hail\linalg\blockmatrix.py in write_from_entry_expr(entry_expr, path, overwrite, mean_impute, center, normalize, axis, block_size)
716 else:
717 field = Env.get_uid()
→ 718 mt.select_entries(**{field: entry_expr})._write_block_matrix(path, overwrite, field, block_size)
719 else:
720 mt = mt.select_entries(__x=entry_expr).unfilter_entries()~\Anaconda3\lib\site-packages\hail\matrixtable.py in _write_block_matrix(self, path, overwrite, entry_field, block_size)
4119 ‘overwrite’: overwrite,
4120 ‘entryField’: entry_field,
→ 4121 ‘blockSize’: block_size}))
4122
4123 def _calculate_new_partitions(self, n_partitions):~\Anaconda3\lib\site-packages\hail\backend\py4j_backend.py in execute(self, ir, timed)
96 raise HailUserError(message_and_trace) from None
97
—> 98 raise e~\Anaconda3\lib\site-packages\hail\backend\py4j_backend.py in execute(self, ir, timed)
72 # print(self._hail_package.expr.ir.Pretty.apply(jir, True, -1))
73 try:
—> 74 result = json.loads(self._jhc.backend().executeJSON(jir))
75 value = ir.typ._from_json(result[‘value’])
76 timings = result[‘timings’]~\Anaconda3\lib\site-packages\py4j\java_gateway.py in call(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
→ 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:~\Anaconda3\lib\site-packages\hail\backend\py4j_backend.py in deco(*args, **kwargs)
30 raise FatalError(‘%s\n\nJava stack trace:\n%s\n’
31 ‘Hail version: %s\n’
—> 32 ‘Error summary: %s’ % (deepest, full, hail.version, deepest), error_id) from None
33 except pyspark.sql.utils.CapturedException as e:
34 raise FatalError(‘%s\n\nJava stack trace:\n%s\n’FatalError: HailException: Invalid locus ‘chr13:32911888’ found. Contig ‘chr13’ is not in the reference genome ‘GRCh37’.
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 43.0 failed 1 times, most recent failure: Lost task 0.0 in stage 43.0 (TID 146, localhost, executor driver): is.hail.utils.HailException: file:/C:/Users/m-a-b/75.vcf:offset 4730: error while parsing line
chr13 32911888 rs1801406 A G 100 PASS DP=556;clinvar=1|benign,1|benign,1|benign,1|benign,1|benign,1|benign;cosmic=1|COSM4415775,1|COSM4415776;GMAF=G|0.2668;AA=A;AF1000G=0.266773;phyloP=-0.265;CSQT=1|BRCA2|NM_000059.3|synonymous_variant GT:GQ:AD:DP:VF:NL:SB:NC 0/1:100:278,278:556:0.500:20:-100.0000:0.0000at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:15)
at is.hail.utils.package$.fatal(package.scala:77)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1745)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1734)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.rvd.RVD$$anonfun$33.apply(RVD.scala:1219)
at is.hail.rvd.RVD$$anonfun$33.apply(RVD.scala:1218)
at is.hail.sparkextras.ContextRDD$$anonfun$crunJobWithIndex$1.apply(ContextRDD.scala:232)
at is.hail.sparkextras.ContextRDD$$anonfun$crunJobWithIndex$1.apply(ContextRDD.scala:230)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:403)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:409)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)
Caused by: is.hail.utils.HailException: Invalid locus ‘chr13:32911888’ found. Contig ‘chr13’ is not in the reference genome ‘GRCh37’.
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:11)
at is.hail.utils.package$.fatal(package.scala:77)
at is.hail.variant.ReferenceGenome.checkLocus(ReferenceGenome.scala:206)
at is.hail.io.vcf.VCFLine$$anonfun$parseAddVariant$2.apply(LoadVCF.scala:358)
at is.hail.io.vcf.VCFLine$$anonfun$parseAddVariant$2.apply(LoadVCF.scala:358)
at scala.Option.foreach(Option.scala:257)
at is.hail.io.vcf.VCFLine.parseAddVariant(LoadVCF.scala:358)
at is.hail.io.vcf.LoadVCF$.parseLine(LoadVCF.scala:1300)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1741)
… 16 moreDriver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876)
at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114)
at is.hail.sparkextras.ContextRDD.crunJobWithIndex(ContextRDD.scala:228)
at is.hail.rvd.RVD$.getKeyInfo(RVD.scala:1218)
at is.hail.rvd.RVD$.makeCoercer(RVD.scala:1293)
at is.hail.expr.ir.GenericTableValue.getRVDCoercer(GenericTableValue.scala:162)
at is.hail.expr.ir.GenericTableValue.toTableValue(GenericTableValue.scala:188)
at is.hail.io.vcf.MatrixVCFReader.apply(LoadVCF.scala:1773)
at is.hail.expr.ir.TableRead.execute(TableIR.scala:1100)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1845)
at is.hail.expr.ir.TableFilter.execute(TableIR.scala:1279)
at is.hail.expr.ir.TableMapRows.execute(TableIR.scala:1845)
at is.hail.expr.ir.Interpret$.run(Interpret.scala:831)
at is.hail.expr.ir.Interpret$.alreadyLowered(Interpret.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.interpretAndCoerce$1(InterpretNonCompilable.scala:16)
at is.hail.expr.ir.InterpretNonCompilable$.is$hail$expr$ir$InterpretNonCompilable$$rewrite$1(InterpretNonCompilable.scala:53)
at is.hail.expr.ir.InterpretNonCompilable$.apply(InterpretNonCompilable.scala:58)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.transform(LoweringPass.scala:67)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3$$anonfun$1.apply(LoweringPass.scala:15)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:15)
at is.hail.expr.ir.lowering.LoweringPass$$anonfun$apply$3.apply(LoweringPass.scala:13)
at is.hail.utils.ExecutionTimer.time(ExecutionTimer.scala:81)
at is.hail.expr.ir.lowering.LoweringPass$class.apply(LoweringPass.scala:13)
at is.hail.expr.ir.lowering.InterpretNonCompilablePass$.apply(LoweringPass.scala:62)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:14)
at is.hail.expr.ir.lowering.LoweringPipeline$$anonfun$apply$1.apply(LoweringPipeline.scala:12)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
at is.hail.expr.ir.lowering.LoweringPipeline.apply(LoweringPipeline.scala:12)
at is.hail.expr.ir.CompileAndEvaluate$._apply(CompileAndEvaluate.scala:28)
at is.hail.backend.spark.SparkBackend.is$hail$backend$spark$SparkBackend$$_execute(SparkBackend.scala:354)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:338)
at is.hail.backend.spark.SparkBackend$$anonfun$execute$1.apply(SparkBackend.scala:335)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:25)
at is.hail.expr.ir.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:23)
at is.hail.utils.package$.using(package.scala:618)
at is.hail.annotations.Region$.scoped(Region.scala:18)
at is.hail.expr.ir.ExecuteContext$.scoped(ExecuteContext.scala:23)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:247)
at is.hail.backend.spark.SparkBackend.execute(SparkBackend.scala:335)
at is.hail.backend.spark.SparkBackend$$anonfun$7.apply(SparkBackend.scala:379)
at is.hail.backend.spark.SparkBackend$$anonfun$7.apply(SparkBackend.scala:377)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.backend.spark.SparkBackend.executeJSON(SparkBackend.scala:377)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Unknown Source)is.hail.utils.HailException: file:/C:/Users/m-a-b/75.vcf:offset 4730: error while parsing line
chr13 32911888 rs1801406 A G 100 PASS DP=556;clinvar=1|benign,1|benign,1|benign,1|benign,1|benign,1|benign;cosmic=1|COSM4415775,1|COSM4415776;GMAF=G|0.2668;AA=A;AF1000G=0.266773;phyloP=-0.265;CSQT=1|BRCA2|NM_000059.3|synonymous_variant GT:GQ:AD:DP:VF:NL:SB:NC 0/1:100:278,278:556:0.500:20:-100.0000:0.0000at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:15)
at is.hail.utils.package$.fatal(package.scala:77)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1745)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1734)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.rvd.RVD$$anonfun$33.apply(RVD.scala:1219)
at is.hail.rvd.RVD$$anonfun$33.apply(RVD.scala:1218)
at is.hail.sparkextras.ContextRDD$$anonfun$crunJobWithIndex$1.apply(ContextRDD.scala:232)
at is.hail.sparkextras.ContextRDD$$anonfun$crunJobWithIndex$1.apply(ContextRDD.scala:230)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:403)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:409)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)is.hail.utils.HailException: Invalid locus ‘chr13:32911888’ found. Contig ‘chr13’ is not in the reference genome ‘GRCh37’.
at is.hail.utils.ErrorHandling$class.fatal(ErrorHandling.scala:11)
at is.hail.utils.package$.fatal(package.scala:77)
at is.hail.variant.ReferenceGenome.checkLocus(ReferenceGenome.scala:206)
at is.hail.io.vcf.VCFLine$$anonfun$parseAddVariant$2.apply(LoadVCF.scala:358)
at is.hail.io.vcf.VCFLine$$anonfun$parseAddVariant$2.apply(LoadVCF.scala:358)
at scala.Option.foreach(Option.scala:257)
at is.hail.io.vcf.VCFLine.parseAddVariant(LoadVCF.scala:358)
at is.hail.io.vcf.LoadVCF$.parseLine(LoadVCF.scala:1300)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1741)
at is.hail.io.vcf.MatrixVCFReader$$anonfun$21$$anonfun$apply$10$$anonfun$apply$11.apply(LoadVCF.scala:1734)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:464)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439)
at is.hail.rvd.RVD$$anonfun$33.apply(RVD.scala:1219)
at is.hail.rvd.RVD$$anonfun$33.apply(RVD.scala:1218)
at is.hail.sparkextras.ContextRDD$$anonfun$crunJobWithIndex$1.apply(ContextRDD.scala:232)
at is.hail.sparkextras.ContextRDD$$anonfun$crunJobWithIndex$1.apply(ContextRDD.scala:230)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:403)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:409)
at java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown Source)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown Source)
at java.lang.Thread.run(Unknown Source)Hail version: 0.2.60-de1845e1c2f6
Error summary: HailException: Invalid locus ‘chr13:32911888’ found. Contig ‘chr13’ is not in the reference genome ‘GRCh37’.
so what is the problem ? and how can i solve it?
Thank you a lot