LineTooLong Error

Hello,

In All of Us, I am trying to calculate a PRS score with around 40000 SNPs. I used the following code with a 2/30 cluster each with 16 CPUs and 104 Ram. But I get this error:


LineTooLong Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
→ 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we’re going to release the connection in finally:, then
714 # the response doesn’t need to know about the connection. Otherwise
715 # it will also try to release it and we’ll have a double-release
716 # mess.

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:449, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
445 except BaseException as e:
446 # Remove the TypeError from the exception chain in
447 # Python 3 (including for exceptions like SystemExit).
448 # Otherwise it looks like a bug in the code.
→ 449 six.raise_from(e, None)
450 except (SocketTimeout, BaseSSLError, SocketError) as e:

File :3, in raise_from(value, from_value)

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:444, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
443 try:
→ 444 httplib_response = conn.getresponse()
445 except BaseException as e:
446 # Remove the TypeError from the exception chain in
447 # Python 3 (including for exceptions like SystemExit).
448 # Otherwise it looks like a bug in the code.

File /opt/conda/lib/python3.10/http/client.py:1375, in HTTPConnection.getresponse(self)
1374 try:
→ 1375 response.begin()
1376 except ConnectionError:

File /opt/conda/lib/python3.10/http/client.py:337, in HTTPResponse.begin(self)
335 raise UnknownProtocol(version)
→ 337 self.headers = self.msg = parse_headers(self.fp)
339 if self.debuglevel > 0:

File /opt/conda/lib/python3.10/http/client.py:234, in parse_headers(fp, _class)
225 “”“Parses only RFC2822 headers from a file pointer.
226
227 email Parser wants to see strings rather than bytes.
(…)
232
233 “””
→ 234 headers = _read_headers(fp)
235 hstring = b’'.join(headers).decode(‘iso-8859-1’)

File /opt/conda/lib/python3.10/http/client.py:216, in _read_headers(fp)
215 if len(line) > _MAXLINE:
→ 216 raise LineTooLong(“header line”)
217 headers.append(line)

LineTooLong: got more than 1048576 bytes when reading header line

The code that I used is:

print(“filtering variants”)
vds = hl.vds.filter_intervals(vds, locus_list)
print(“variants after filter:”, vds.variant_data.count())
print(“filtering samples”)
vds = hl.vds.filter_samples(vds, samples, keep=True, remove_dead_alleles = True)
print(“after sample filter:”, vds.variant_data.count())
vds = vds.checkpoint(f’vds_checkpoint_{prs_name}.vds’)
vds = hl.vds.split_multi(vds)
mt = hl.vds.to_dense_mt(vds)
mt.count()

Can you please help me with that?

Your stack trace includes no hail code, is this the whole traceback? This error is of a limitation in python’s HTTP handling. We’re going to need more detail.

You have a lot of print statements. What output are you seeing before this error?

Thank you for the reply. The error happens when I am using the filter_variants function. I have a PRS with 40000 SNPs and the locus_list is quite long as the SNPs are sparse all over the genome. This is how I generate the locus list:

test_intervals = [f"chr{row.hm_chr}:{int(row.hm_pos)}-chr{row.hm_chr}:{int(row.hm_pos)+1}" 
                 for idx, row in tqdm(prs.iterrows(), total= len(prs))]
The complete traceback is this:
LineTooLong                               Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
    704     conn,
    705     method,
    706     url,
    707     timeout=timeout_obj,
    708     body=body,
    709     headers=headers,
    710     chunked=chunked,
    711 )
    713 # If we're going to release the connection in ``finally:``, then
    714 # the response doesn't need to know about the connection. Otherwise
    715 # it will also try to release it and we'll have a double-release
    716 # mess.

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:449, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    445         except BaseException as e:
    446             # Remove the TypeError from the exception chain in
    447             # Python 3 (including for exceptions like SystemExit).
    448             # Otherwise it looks like a bug in the code.
--> 449             six.raise_from(e, None)
    450 except (SocketTimeout, BaseSSLError, SocketError) as e:

File <string>:3, in raise_from(value, from_value)

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:444, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    443 try:
--> 444     httplib_response = conn.getresponse()
    445 except BaseException as e:
    446     # Remove the TypeError from the exception chain in
    447     # Python 3 (including for exceptions like SystemExit).
    448     # Otherwise it looks like a bug in the code.

File /opt/conda/lib/python3.10/http/client.py:1375, in HTTPConnection.getresponse(self)
   1374 try:
-> 1375     response.begin()
   1376 except ConnectionError:

File /opt/conda/lib/python3.10/http/client.py:337, in HTTPResponse.begin(self)
    335     raise UnknownProtocol(version)
--> 337 self.headers = self.msg = parse_headers(self.fp)
    339 if self.debuglevel > 0:

File /opt/conda/lib/python3.10/http/client.py:234, in parse_headers(fp, _class)
    225 """Parses only RFC2822 headers from a file pointer.
    226 
    227 email Parser wants to see strings rather than bytes.
   (...)
    232 
    233 """
--> 234 headers = _read_headers(fp)
    235 hstring = b''.join(headers).decode('iso-8859-1')

File /opt/conda/lib/python3.10/http/client.py:216, in _read_headers(fp)
    215 if len(line) > _MAXLINE:
--> 216     raise LineTooLong("header line")
    217 headers.append(line)

LineTooLong: got more than 1048576 bytes when reading header line

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/requests/adapters.py:667, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    666 try:
--> 667     resp = conn.urlopen(
    668         method=request.method,
    669         url=url,
    670         body=request.body,
    671         headers=request.headers,
    672         redirect=False,
    673         assert_same_host=False,
    674         preload_content=False,
    675         decode_content=False,
    676         retries=self.max_retries,
    677         timeout=timeout,
    678         chunked=chunked,
    679     )
    681 except (ProtocolError, OSError) as err:

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:787, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    785     e = ProtocolError("Connection aborted.", e)
--> 787 retries = retries.increment(
    788     method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
    789 )
    790 retries.sleep()

File /opt/conda/lib/python3.10/site-packages/urllib3/util/retry.py:550, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
    549 if read is False or not self._is_method_retryable(method):
--> 550     raise six.reraise(type(error), error, _stacktrace)
    551 elif read is not None:

File /opt/conda/lib/python3.10/site-packages/urllib3/packages/six.py:769, in reraise(tp, value, tb)
    768 if value.__traceback__ is not tb:
--> 769     raise value.with_traceback(tb)
    770 raise value

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
    702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
    704     conn,
    705     method,
    706     url,
    707     timeout=timeout_obj,
    708     body=body,
    709     headers=headers,
    710     chunked=chunked,
    711 )
    713 # If we're going to release the connection in ``finally:``, then
    714 # the response doesn't need to know about the connection. Otherwise
    715 # it will also try to release it and we'll have a double-release
    716 # mess.

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:449, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    445         except BaseException as e:
    446             # Remove the TypeError from the exception chain in
    447             # Python 3 (including for exceptions like SystemExit).
    448             # Otherwise it looks like a bug in the code.
--> 449             six.raise_from(e, None)
    450 except (SocketTimeout, BaseSSLError, SocketError) as e:

File <string>:3, in raise_from(value, from_value)

File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:444, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
    443 try:
--> 444     httplib_response = conn.getresponse()
    445 except BaseException as e:
    446     # Remove the TypeError from the exception chain in
    447     # Python 3 (including for exceptions like SystemExit).
    448     # Otherwise it looks like a bug in the code.

File /opt/conda/lib/python3.10/http/client.py:1375, in HTTPConnection.getresponse(self)
   1374 try:
-> 1375     response.begin()
   1376 except ConnectionError:

File /opt/conda/lib/python3.10/http/client.py:337, in HTTPResponse.begin(self)
    335     raise UnknownProtocol(version)
--> 337 self.headers = self.msg = parse_headers(self.fp)
    339 if self.debuglevel > 0:

File /opt/conda/lib/python3.10/http/client.py:234, in parse_headers(fp, _class)
    225 """Parses only RFC2822 headers from a file pointer.
    226 
    227 email Parser wants to see strings rather than bytes.
   (...)
    232 
    233 """
--> 234 headers = _read_headers(fp)
    235 hstring = b''.join(headers).decode('iso-8859-1')

File /opt/conda/lib/python3.10/http/client.py:216, in _read_headers(fp)
    215 if len(line) > _MAXLINE:
--> 216     raise LineTooLong("header line")
    217 headers.append(line)

ProtocolError: ('Connection aborted.', LineTooLong('got more than 1048576 bytes when reading header line'))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
Cell In[12], line 2
      1 print("filtering variants")
----> 2 vds = hl.vds.filter_intervals(vds, locus_list[:5000])
      3 print("variants after filter:", vds.variant_data.count())
      4 print("filtering samples")

File <decorator-gen-1870>:2, in filter_intervals(vds, intervals, split_reference_blocks, keep)

File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    582 @decorator
    583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
    584     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585     return __original_func(*args_, **kwargs_)

File /opt/conda/lib/python3.10/site-packages/hail/vds/methods.py:623, in filter_intervals(vds, intervals, split_reference_blocks, keep)
    621 if split_reference_blocks and not keep:
    622     raise ValueError("'filter_intervals': cannot use 'split_reference_blocks' with keep=False")
--> 623 return _parameterized_filter_intervals(
    624     vds, intervals, keep=keep, mode='split_at_boundaries' if split_reference_blocks else 'variants_only'
    625 )

File <decorator-gen-1866>:2, in _parameterized_filter_intervals(vds, intervals, keep, mode)

File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    582 @decorator
    583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
    584     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585     return __original_func(*args_, **kwargs_)

File /opt/conda/lib/python3.10/site-packages/hail/vds/methods.py:495, in _parameterized_filter_intervals(vds, intervals, keep, mode)
    489     max_len = hl.eval(vds.reference_data.index_globals()[rbml])
    490     ref_intervals = intervals.map(
    491         lambda interval: hl.interval(
    492             interval.start - (max_len - 1), interval.end, interval.includes_start, interval.includes_end
    493         )
    494     )
--> 495     reference_data = hl.filter_intervals(reference_data, ref_intervals, keep)
    496 else:
    497     warning(
    498         "'hl.vds.filter_intervals': filtering intervals without a known max reference block length"
    499         "\n  (computed by `hl.vds.store_ref_block_max_length` or 'hl.vds.truncate_reference_blocks')"
    500         "\n  requires a full pass over the reference data (expensive!)"
    501     )

File <decorator-gen-1632>:2, in filter_intervals(ds, intervals, keep)

File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    582 @decorator
    583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
    584     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585     return __original_func(*args_, **kwargs_)

File /opt/conda/lib/python3.10/site-packages/hail/methods/misc.py:415, in filter_intervals(ds, intervals, keep)
    412     else:
    413         return interval
--> 415 intervals = hl.eval(intervals)
    416 intervals = [wrap_input(i) for i in intervals]
    418 if isinstance(ds, MatrixTable):

File <decorator-gen-570>:2, in eval(expression)

File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    582 @decorator
    583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
    584     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585     return __original_func(*args_, **kwargs_)

File /opt/conda/lib/python3.10/site-packages/hail/expr/expressions/expression_utils.py:194, in eval(expression)
    167 @typecheck(expression=expr_any)
    168 def eval(expression):
    169     """Evaluate a Hail expression, returning the result.
    170 
    171     This method is extremely useful for learning about Hail expressions and
   (...)
    192     Any
    193     """
--> 194     return eval_timed(expression)[0]

File <decorator-gen-568>:2, in eval_timed(expression)

File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
    582 @decorator
    583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
    584     args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585     return __original_func(*args_, **kwargs_)

File /opt/conda/lib/python3.10/site-packages/hail/expr/expressions/expression_utils.py:164, in eval_timed(expression)
    161     uid = Env.get_uid()
    162     ir = expression._indices.source.select_globals(**{uid: expression}).index_globals()[uid]._ir
--> 164 return Env.backend().execute(MakeTuple([ir]), timed=True)[0]

File /opt/conda/lib/python3.10/site-packages/hail/backend/spark_backend.py:226, in SparkBackend.execute(self, ir, timed)
    223     except Exception as fatal:
    224         raise err from fatal
--> 226 raise err

File /opt/conda/lib/python3.10/site-packages/hail/backend/spark_backend.py:218, in SparkBackend.execute(self, ir, timed)
    216 def execute(self, ir: BaseIR, timed: bool = False) -> Any:
    217     try:
--> 218         return super().execute(ir, timed)
    219     except Exception as err:
    220         if self._copy_log_on_error:

File /opt/conda/lib/python3.10/site-packages/hail/backend/backend.py:188, in Backend.execute(self, ir, timed)
    186 payload = ExecutePayload(self._render_ir(ir), '{"name":"StreamBufferSpec"}', timed)
    187 try:
--> 188     result, timings = self._rpc(ActionTag.EXECUTE, payload)
    189 except FatalError as e:
    190     raise e.maybe_user_error(ir) from None

File /opt/conda/lib/python3.10/site-packages/hail/backend/py4j_backend.py:218, in Py4JBackend._rpc(self, action, payload)
    216 path = action_routes[action]
    217 port = self._backend_server_port
--> 218 resp = self._requests_session.post(f'http://localhost:{port}{path}', data=data)
    219 if resp.status_code >= 400:
    220     error_json = orjson.loads(resp.content)

File /opt/conda/lib/python3.10/site-packages/requests/sessions.py:637, in Session.post(self, url, data, json, **kwargs)
    626 def post(self, url, data=None, json=None, **kwargs):
    627     r"""Sends a POST request. Returns :class:`Response` object.
    628 
    629     :param url: URL for the new :class:`Request` object.
   (...)
    634     :rtype: requests.Response
    635     """
--> 637     return self.request("POST", url, data=data, json=json, **kwargs)

File /opt/conda/lib/python3.10/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File /opt/conda/lib/python3.10/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File /opt/conda/lib/python3.10/site-packages/requests/adapters.py:682, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    667     resp = conn.urlopen(
    668         method=request.method,
    669         url=url,
   (...)
    678         chunked=chunked,
    679     )
    681 except (ProtocolError, OSError) as err:
--> 682     raise ConnectionError(err, request=request)
    684 except MaxRetryError as e:
    685     if isinstance(e.reason, ConnectTimeoutError):
    686         # TODO: Remove this in 3.0.0: see #2811

ConnectionError: ('Connection aborted.', LineTooLong('got more than 1048576 bytes when reading header line'))