Hi all,
I would like to process UK-Biobank 200K WGS data with hail.
Unfortunately, I could not import vcf with the attached error.
How can we import those vcf files? I appreciate any information about this
Thank you so much.
vcf = 'file:///mnt/project/Bulk/Whole genome sequences/Population level WGS variants, pVCF format - interim 200k release/ukb24304_c2_b1_v1.vcf.gz'
mt = hl.import_vcf(vcf, force_bgz = True, reference_genome = 'GRCh38', find_replace = ('nul', '.'), array_elements_required = False)
FatalError Traceback (most recent call last)
<ipython-input-3-35fc70fb23bd> in <module>
6 find_replace = ('nul', '.'),
----> 7 array_elements_required = False
8 )
<decorator-gen-1475> in import_vcf(path, force, force_bgz, header_file, min_partitions, drop_samples, call_fields, reference_genome, contig_recoding, array_elements_required, skip_invalid_loci, entry_float_type, filter, find_replace, n_partitions, block_size, _partitions)
/opt/conda/lib/python3.6/site-packages/hail/typecheck/check.py in wrapper(__original_func, *args, **kwargs)
575 def wrapper(__original_func, *args, **kwargs):
576 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 577 return __original_func(*args_, **kwargs_)
578
579 return wrapper
/opt/conda/lib/python3.6/site-packages/hail/methods/impex.py in import_vcf(path, force, force_bgz, header_file, min_partitions, drop_samples, call_fields, reference_genome, contig_recoding, array_elements_required, skip_invalid_loci, entry_float_type, filter, find_replace, n_partitions, block_size, _partitions)
2370 skip_invalid_loci, force_bgz, force, filter, find_replace,
2371 _partitions)
-> 2372 return MatrixTable(ir.MatrixRead(reader, drop_cols=drop_samples))
2373
2374
/opt/conda/lib/python3.6/site-packages/hail/matrixtable.py in __init__(self, mir)
556 self._entry_indices = Indices(self, {self._row_axis, self._col_axis})
557
--> 558 self._type = self._mir.typ
559
560 self._global_type = self._type.global_type
/opt/conda/lib/python3.6/site-packages/hail/ir/base_ir.py in typ(self)
359 def typ(self):
360 if self._type is None:
--> 361 self._compute_type()
362 assert self._type is not None, self
363 return self._type
/opt/conda/lib/python3.6/site-packages/hail/ir/matrix_ir.py in _compute_type(self)
59
60 def _compute_type(self):
---> 61 self._type = Env.backend().matrix_type(self)
62
63
/opt/conda/lib/python3.6/site-packages/hail/backend/spark_backend.py in matrix_type(self, mir)
284
285 def matrix_type(self, mir):
--> 286 jir = self._to_java_matrix_ir(mir)
287 return tmatrix._from_java(jir.typ())
288
/opt/conda/lib/python3.6/site-packages/hail/backend/spark_backend.py in _to_java_matrix_ir(self, ir)
270
271 def _to_java_matrix_ir(self, ir):
--> 272 return self._to_java_ir(ir, self._parse_matrix_ir)
273
274 def _to_java_blockmatrix_ir(self, ir):
/opt/conda/lib/python3.6/site-packages/hail/backend/spark_backend.py in _to_java_ir(self, ir, parse)
260 r = CSERenderer(stop_at_jir=True)
261 # FIXME parse should be static
--> 262 ir._jir = parse(r(ir), ir_map=r.jirs)
263 return ir._jir
264
/opt/conda/lib/python3.6/site-packages/hail/backend/spark_backend.py in _parse_matrix_ir(self, code, ref_map, ir_map)
238
239 def _parse_matrix_ir(self, code, ref_map={}, ir_map={}):
--> 240 return self._jbackend.parse_matrix_ir(code, ref_map, ir_map)
241
242 def _parse_blockmatrix_ir(self, code, ref_map={}, ir_map={}):
/cluster/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/opt/conda/lib/python3.6/site-packages/hail/backend/py4j_backend.py in deco(*args, **kwargs)
29 raise FatalError('%s\n\nJava stack trace:\n%s\n'
30 'Hail version: %s\n'
---> 31 'Error summary: %s' % (deepest, full, hail.__version__, deepest), error_id) from None
32 except pyspark.sql.utils.CapturedException as e:
33 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: StringIndexOutOfBoundsException: String index out of range: 0
Java stack trace:
java.lang.StringIndexOutOfBoundsException: String index out of range: 0
at java.lang.String.charAt(String.java:658)
at scala.collection.immutable.StringOps$.apply$extension(StringOps.scala:37)
at is.hail.io.vcf.LoadVCF$$anonfun$getHeaderLines$1$$anonfun$apply$5.apply(LoadVCF.scala:1293)
at is.hail.io.vcf.LoadVCF$$anonfun$getHeaderLines$1$$anonfun$apply$5.apply(LoadVCF.scala:1293)
at scala.collection.Iterator$$anon$16.hasNext(Iterator.scala:600)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
at scala.collection.Iterator$class.foreach(Iterator.scala:891)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334)
at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48)
at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310)
at scala.collection.AbstractIterator.to(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302)
at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1334)
at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289)
at scala.collection.AbstractIterator.toArray(Iterator.scala:1334)
at is.hail.io.vcf.LoadVCF$$anonfun$getHeaderLines$1.apply(LoadVCF.scala:1295)
at is.hail.io.vcf.LoadVCF$$anonfun$getHeaderLines$1.apply(LoadVCF.scala:1291)
at is.hail.io.fs.FS$$anonfun$readLines$1.apply(FS.scala:221)
at is.hail.io.fs.FS$$anonfun$readLines$1.apply(FS.scala:212)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.io.fs.FS$class.readLines(FS.scala:211)
at is.hail.io.fs.HadoopFS.readLines(HadoopFS.scala:70)
at is.hail.io.vcf.LoadVCF$.getHeaderLines(LoadVCF.scala:1291)
at is.hail.io.vcf.MatrixVCFReader$.apply(LoadVCF.scala:1573)
at is.hail.io.vcf.MatrixVCFReader$.fromJValue(LoadVCF.scala:1665)
at is.hail.expr.ir.MatrixReader$.fromJson(MatrixIR.scala:90)
at is.hail.expr.ir.IRParser$.matrix_ir_1(Parser.scala:1720)
at is.hail.expr.ir.IRParser$$anonfun$matrix_ir$1.apply(Parser.scala:1646)
at is.hail.expr.ir.IRParser$$anonfun$matrix_ir$1.apply(Parser.scala:1646)
at is.hail.utils.StackSafe$More.advance(StackSafe.scala:64)
at is.hail.utils.StackSafe$.run(StackSafe.scala:16)
at is.hail.utils.StackSafe$StackFrame.run(StackSafe.scala:32)
at is.hail.expr.ir.IRParser$$anonfun$parse_matrix_ir$1.apply(Parser.scala:1986)
at is.hail.expr.ir.IRParser$$anonfun$parse_matrix_ir$1.apply(Parser.scala:1986)
at is.hail.expr.ir.IRParser$.parse(Parser.scala:1973)
at is.hail.expr.ir.IRParser$.parse_matrix_ir(Parser.scala:1986)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_matrix_ir$1$$anonfun$apply$22.apply(SparkBackend.scala:660)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_matrix_ir$1$$anonfun$apply$22.apply(SparkBackend.scala:659)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1$$anonfun$apply$1.apply(ExecuteContext.scala:47)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1$$anonfun$apply$1.apply(ExecuteContext.scala:47)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:47)
at is.hail.backend.ExecuteContext$$anonfun$scoped$1.apply(ExecuteContext.scala:46)
at is.hail.utils.package$.using(package.scala:638)
at is.hail.annotations.RegionPool$.scoped(RegionPool.scala:17)
at is.hail.backend.ExecuteContext$.scoped(ExecuteContext.scala:46)
at is.hail.backend.spark.SparkBackend.withExecuteContext(SparkBackend.scala:275)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_matrix_ir$1.apply(SparkBackend.scala:659)
at is.hail.backend.spark.SparkBackend$$anonfun$parse_matrix_ir$1.apply(SparkBackend.scala:658)
at is.hail.utils.ExecutionTimer$.time(ExecutionTimer.scala:52)
at is.hail.utils.ExecutionTimer$.logTime(ExecutionTimer.scala:59)
at is.hail.backend.spark.SparkBackend.parse_matrix_ir(SparkBackend.scala:658)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Hail version: 0.2.78-b17627756568
Error summary: StringIndexOutOfBoundsException: String index out of range: 0