Dear Hail Team,
I am learning to use Hail to handle large genomic data. Thank you for providing such a wonderful tool to the community .
I am encountering a technical issue. While testing the _linear_skat function with moderately sized data (7944 SNPs and 442075 samples, 76.4M bytes on disk) on my personal computer (MacBook Pro M3, 32G memory), I received ConnectionError messages. It appears to use unreasonably large amount of memory. Below is the simplified code that I believe caused the problem. The issue occurs after the combination of operations mt.aggregate_cols(hl.agg.collect(…)) and hl.nd.array() on a one-column 2D array containing 442075 ones. This does not occur with smaller sample sizes, such as 2000. Although I can work around this issue using other Python functions, I would like to better understand why this happens and learn general principles to avoid such issue when using Hail functions in the future. Below, I have included the full script and error messages to provide all necessary information. If there is a more efficient way to share information, please let me know.
Thank you very much for your assistance.
Best,
Zheyang
Hail version: 0.2.129-41126be2df04
Python version: 3.9.6
Java version:
openjdk version “11.0.22” 2024-01-16 LTS
OpenJDK Runtime Environment Zulu11.70+15-CA (build 11.0.22+7-LTS)
OpenJDK 64-Bit Server VM Zulu11.70+15-CA (build 11.0.22+7-LTS, mixed mode)
Script:
‘’’
import hail as hl
hl.init()
#Load the matrix table
#A biger data (7944 SNPs and 442075 samples, 76.4M bytes on disk) that leads to error message when running the code below
mt = hl.read_matrix_table(‘/Users/zheyangwu/Desktop/temp/mt_chr22’)
#A smaller data (7944 SNPs and 2000 samples, 578k bytes) that does not cause error message
#mt = hl.read_matrix_table(‘/Users/zheyangwu/Desktop/temp/mt_1k_cases_controls_chr22’)
mt.count()
###Data processing flow of the _linear_skat function in Hail
#with much simplification to focus on the essential code that leads
#to error messages
covariates = [1.0]
#redefine the matrix table, set up column fields
mt = mt._select_all(
col_exprs=dict(covariates=covariates) #covariates extends [1.0] to [1.0, 1.0, 1.0, …] for all samples
)
mt.covariates.show() #This works fine
#Retrieve covmat
covmat = mt.aggregate_cols(
hl.agg.collect(mt.covariates.map(hl.float)), #matrix of covariates
_localize=False
)
covmat.show() #This works fine
covmat2 = hl.nd.array(hl.literal([[1.0]] * 442075))
covmat2.show() #This works fine if running before the code below.
covmat=hl.nd.array(covmat)
covmat.show() #!!!This gets the ConnectionError messages when using the larger data with 442,075 samples.
‘’’
Error message:
‘’’
RemoteDisconnected Traceback (most recent call last)
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:715, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
714 # Make the request on the httplib connection object.
→ 715 httplib_response = self._make_request(
716 conn,
717 method,
718 url,
719 timeout=timeout_obj,
720 body=body,
721 headers=headers,
722 chunked=chunked,
723 )
725 # If we’re going to release the connection in finally:
, then
726 # the response doesn’t need to know about the connection. Otherwise
727 # it will also try to release it and we’ll have a double-release
728 # mess.
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:467, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
463 except BaseException as e:
464 # Remove the TypeError from the exception chain in
465 # Python 3 (including for exceptions like SystemExit).
466 # Otherwise it looks like a bug in the code.
→ 467 six.raise_from(e, None)
468 except (SocketTimeout, BaseSSLError, SocketError) as e:
File :3, in raise_from(value, from_value)
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:462, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
461 try:
→ 462 httplib_response = conn.getresponse()
463 except BaseException as e:
464 # Remove the TypeError from the exception chain in
465 # Python 3 (including for exceptions like SystemExit).
466 # Otherwise it looks like a bug in the code.
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/http/client.py:1349, in HTTPConnection.getresponse(self)
1348 try:
→ 1349 response.begin()
1350 except ConnectionError:
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/http/client.py:316, in HTTPResponse.begin(self)
315 while True:
→ 316 version, status, reason = self._read_status()
317 if status != CONTINUE:
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/http/client.py:285, in HTTPResponse._read_status(self)
282 if not line:
283 # Presumably, the server closed the connection before
284 # sending a valid response.
→ 285 raise RemoteDisconnected(“Remote end closed connection without”
286 " response")
287 try:
RemoteDisconnected: Remote end closed connection without response
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
File ~/Library/Python/3.9/lib/python/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
485 try:
→ 486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
489 body=request.body,
490 headers=request.headers,
491 redirect=False,
492 assert_same_host=False,
493 preload_content=False,
494 decode_content=False,
495 retries=self.max_retries,
496 timeout=timeout,
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:799, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
797 e = ProtocolError(“Connection aborted.”, e)
→ 799 retries = retries.increment(
800 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
801 )
802 retries.sleep()
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/util/retry.py:550, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
549 if read is False or not self._is_method_retryable(method):
→ 550 raise six.reraise(type(error), error, _stacktrace)
551 elif read is not None:
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/packages/six.py:769, in reraise(tp, value, tb)
768 if value.traceback is not tb:
→ 769 raise value.with_traceback(tb)
770 raise value
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:715, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
714 # Make the request on the httplib connection object.
→ 715 httplib_response = self._make_request(
716 conn,
717 method,
718 url,
719 timeout=timeout_obj,
720 body=body,
721 headers=headers,
722 chunked=chunked,
723 )
725 # If we’re going to release the connection in finally:
, then
726 # the response doesn’t need to know about the connection. Otherwise
727 # it will also try to release it and we’ll have a double-release
728 # mess.
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:467, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
463 except BaseException as e:
464 # Remove the TypeError from the exception chain in
465 # Python 3 (including for exceptions like SystemExit).
466 # Otherwise it looks like a bug in the code.
→ 467 six.raise_from(e, None)
468 except (SocketTimeout, BaseSSLError, SocketError) as e:
File :3, in raise_from(value, from_value)
File ~/Library/Python/3.9/lib/python/site-packages/urllib3/connectionpool.py:462, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
461 try:
→ 462 httplib_response = conn.getresponse()
463 except BaseException as e:
464 # Remove the TypeError from the exception chain in
465 # Python 3 (including for exceptions like SystemExit).
466 # Otherwise it looks like a bug in the code.
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/http/client.py:1349, in HTTPConnection.getresponse(self)
1348 try:
→ 1349 response.begin()
1350 except ConnectionError:
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/http/client.py:316, in HTTPResponse.begin(self)
315 while True:
→ 316 version, status, reason = self._read_status()
317 if status != CONTINUE:
File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/http/client.py:285, in HTTPResponse._read_status(self)
282 if not line:
283 # Presumably, the server closed the connection before
284 # sending a valid response.
→ 285 raise RemoteDisconnected(“Remote end closed connection without”
286 " response")
287 try:
ProtocolError: (‘Connection aborted.’, RemoteDisconnected(‘Remote end closed connection without response’))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
File ~/Library/Python/3.9/lib/python/site-packages/IPython/core/formatters.py:708, in PlainTextFormatter.call(self, obj)
701 stream = StringIO()
702 printer = pretty.RepresentationPrinter(stream, self.verbose,
703 self.max_width, self.newline,
704 max_seq_length=self.max_seq_length,
705 singleton_pprinters=self.singleton_printers,
706 type_pprinters=self.type_printers,
707 deferred_pprinters=self.deferred_printers)
→ 708 printer.pretty(obj)
709 printer.flush()
710 return stream.getvalue()
File ~/Library/Python/3.9/lib/python/site-packages/IPython/lib/pretty.py:410, in RepresentationPrinter.pretty(self, obj)
407 return meth(obj, self, cycle)
408 if cls is not object
409 and callable(cls.dict.get(‘repr’)):
→ 410 return _repr_pprint(obj, self, cycle)
412 return _default_pprint(obj, self, cycle)
413 finally:
File ~/Library/Python/3.9/lib/python/site-packages/IPython/lib/pretty.py:778, in repr_pprint(obj, p, cycle)
776 “”“A pprint that just redirects to the normal repr function.”“”
777 # Find newlines and replace them with p.break()
→ 778 output = repr(obj)
779 lines = output.splitlines()
780 with p.group():
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:2157, in Table._Show.repr(self)
2156 def repr(self):
→ 2157 return self.str()
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:2154, in Table._Show.str(self)
2153 def str(self):
→ 2154 return self._ascii_str()
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:2180, in Table._Show._ascii_str(self)
2177 return s[: truncate - 3] + “…”
2178 return s
→ 2180 rows, has_more, dtype = self.data()
2181 fields = list(dtype)
2182 trunc_fields = [trunc(f) for f in fields]
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:2164, in Table._Show.data(self)
2162 row_dtype = t.row.dtype
2163 t = t.select(**{k: hl._showstr(v) for (k, v) in t.row.items()})
→ 2164 rows, has_more = t._take_n(self.n)
2165 self._data = (rows, has_more, row_dtype)
2166 return self._data
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:2310, in Table._take_n(self, n)
2308 has_more = False
2309 else:
→ 2310 rows = self.take(n + 1)
2311 has_more = len(rows) > n
2312 rows = rows[:n]
File :2, in take(self, n, _localize)
File ~/Library/Python/3.9/lib/python/site-packages/hail/typecheck/check.py:585, in _make_dec..wrapper(__original_func, *args, **kwargs)
582 @decorator
583 def wrapper(original_func: Callable[…, T], *args, **kwargs) → T:
584 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
→ 585 return original_func(*args, **kwargs)
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:3027, in Table.take(self, n, _localize)
2993 @typecheck_method(n=int, _localize=bool)
2994 def take(self, n, _localize=True):
2995 “”“Collect the first n
rows of the table into a local list.
2996
2997 Examples
(…)
3024 List of row structs.
3025 “””
→ 3027 return self.head(n).collect(_localize)
File :2, in collect(self, _localize, _timed)
File ~/Library/Python/3.9/lib/python/site-packages/hail/typecheck/check.py:585, in _make_dec..wrapper(__original_func, *args, **kwargs)
582 @decorator
583 def wrapper(original_func: Callable[…, T], *args, **kwargs) → T:
584 args, kwargs = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
→ 585 return original_func(*args, **kwargs)
File ~/Library/Python/3.9/lib/python/site-packages/hail/table.py:2814, in Table.collect(self, _localize, _timed)
2812 e = construct_expr(rows_ir, hl.tarray(t.row.dtype))
2813 if _localize:
→ 2814 return Env.backend().execute(e._ir, timed=_timed)
2815 else:
2816 return e
File ~/Library/Python/3.9/lib/python/site-packages/hail/backend/backend.py:188, in Backend.execute(self, ir, timed)
186 payload = ExecutePayload(self._render_ir(ir), ‘{“name”:“StreamBufferSpec”}’, timed)
187 try:
→ 188 result, timings = self._rpc(ActionTag.EXECUTE, payload)
189 except FatalError as e:
190 raise e.maybe_user_error(ir) from None
File ~/Library/Python/3.9/lib/python/site-packages/hail/backend/py4j_backend.py:218, in Py4JBackend._rpc(self, action, payload)
216 path = action_routes[action]
217 port = self._backend_server_port
→ 218 resp = self._requests_session.post(f’http://localhost:{port}{path}', data=data)
219 if resp.status_code >= 400:
220 error_json = orjson.loads(resp.content)
File ~/Library/Python/3.9/lib/python/site-packages/requests/sessions.py:637, in Session.post(self, url, data, json, **kwargs)
626 def post(self, url, data=None, json=None, **kwargs):
627 r""“Sends a POST request. Returns :class:Response
object.
628
629 :param url: URL for the new :class:Request
object.
(…)
634 :rtype: requests.Response
635 “””
→ 637 return self.request(“POST”, url, data=data, json=json, **kwargs)
File ~/Library/Python/3.9/lib/python/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 “timeout”: timeout,
586 “allow_redirects”: allow_redirects,
587 }
588 send_kwargs.update(settings)
→ 589 resp = self.send(prep, **send_kwargs)
591 return resp
File ~/Library/Python/3.9/lib/python/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
→ 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File ~/Library/Python/3.9/lib/python/site-packages/requests/adapters.py:501, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
(…)
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
→ 501 raise ConnectionError(err, request=request)
503 except MaxRetryError as e:
504 if isinstance(e.reason, ConnectTimeoutError):
505 # TODO: Remove this in 3.0.0: see #2811
ConnectionError: (‘Connection aborted.’, RemoteDisconnected(‘Remote end closed connection without response’))
‘’’