Hi,
I’m working on a variant dataset (~ 200M variants, 2000 patients), and trying the hl.plot.qq function. I can run it without any issue on a sample of my dataset (filtering by samples and variants, so I can do it on only ~300 samples and 1 chromosome). When I try on all my dataset, I get this error:
FatalError Traceback (most recent call last)
in ()
----> 1 p = hl.plot.qq(gwas.logreg.p_value)
2 show§
/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
–> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)
/home/hail/hail.zip/hail/plot/plots.py in qq(pvals)
119 if isinstance(pvals, Expression):
120 if pvals._indices.source is not None:
–> 121 pvals = pvals.collect()
122 else:
123 return ValueError(‘Invalid input’)
/home/hail/hail.zip/hail/expr/expressions/base_expression.py in collect(self)
763 uid = Env.get_uid()
764 t = self._to_table(uid)
–> 765 return [r[uid] for r in t._select(“collect”, None, hl.struct(**{uid: t[uid]})).collect()]
766
767 @property
/home/hail/hail.zip/hail/table.py in collect(self)
1597 List of rows.
1598 “”"
-> 1599 return hl.tarray(self.row.dtype)._from_json(self._jt.collectJSON())
1600
1601 def describe(self):
/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in call(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/home/hail/hail.zip/hail/utils/java.py in deco(*args, **kwargs)
198 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
199 ‘Hail version: %s\n’
–> 200 ‘Error summary: %s’ % (deepest, full, hail.version, deepest)) from None
201 except pyspark.sql.utils.CapturedException as e:
202 raise FatalError(’%s\n\nJava stack trace:\n%s\n’
FatalError: OutOfMemoryError: null
Java stack trace:
java.lang.OutOfMemoryError: null
at java.lang.AbstractStringBuilder.hugeCapacity(AbstractStringBuilder.java:161)
at java.lang.AbstractStringBuilder.newCapacity(AbstractStringBuilder.java:155)
at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:125)
at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:596)
at java.lang.StringBuilder.append(StringBuilder.java:190)
at com.fasterxml.jackson.core.util.TextBuffer.contentsAsString(TextBuffer.java:351)
at com.fasterxml.jackson.core.io.SegmentedStringWriter.getAndClear(SegmentedStringWriter.java:83)
at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:2933)
at org.json4s.jackson.JsonMethods$class.compact(JsonMethods.scala:34)
at org.json4s.jackson.JsonMethods$.compact(JsonMethods.scala:50)
at is.hail.table.Table.collectJSON(Table.scala:635)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.lang.Thread.run(Thread.java:748)
Hail version: devel-5b6647193859
Error summary: OutOfMemoryError: null
Is it an issue with the size of my dataset? Is it the collect()?
Thanks,
Laura