I tried performing a union_row operation on two matrix tables and got the following error. I am not sure what the error means and how to proceed.
---------------------------------------------------------------------------
LineTooLong Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we're going to release the connection in ``finally:``, then
714 # the response doesn't need to know about the connection. Otherwise
715 # it will also try to release it and we'll have a double-release
716 # mess.
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:449, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
445 except BaseException as e:
446 # Remove the TypeError from the exception chain in
447 # Python 3 (including for exceptions like SystemExit).
448 # Otherwise it looks like a bug in the code.
--> 449 six.raise_from(e, None)
450 except (SocketTimeout, BaseSSLError, SocketError) as e:
File <string>:3, in raise_from(value, from_value)
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:444, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
443 try:
--> 444 httplib_response = conn.getresponse()
445 except BaseException as e:
446 # Remove the TypeError from the exception chain in
447 # Python 3 (including for exceptions like SystemExit).
448 # Otherwise it looks like a bug in the code.
File /opt/conda/lib/python3.10/http/client.py:1375, in HTTPConnection.getresponse(self)
1374 try:
-> 1375 response.begin()
1376 except ConnectionError:
File /opt/conda/lib/python3.10/http/client.py:337, in HTTPResponse.begin(self)
335 raise UnknownProtocol(version)
--> 337 self.headers = self.msg = parse_headers(self.fp)
339 if self.debuglevel > 0:
File /opt/conda/lib/python3.10/http/client.py:234, in parse_headers(fp, _class)
225 """Parses only RFC2822 headers from a file pointer.
226
227 email Parser wants to see strings rather than bytes.
(...)
232
233 """
--> 234 headers = _read_headers(fp)
235 hstring = b''.join(headers).decode('iso-8859-1')
File /opt/conda/lib/python3.10/http/client.py:216, in _read_headers(fp)
215 if len(line) > _MAXLINE:
--> 216 raise LineTooLong("header line")
217 headers.append(line)
LineTooLong: got more than 1048576 bytes when reading header line
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
File /opt/conda/lib/python3.10/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
485 try:
--> 486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
489 body=request.body,
490 headers=request.headers,
491 redirect=False,
492 assert_same_host=False,
493 preload_content=False,
494 decode_content=False,
495 retries=self.max_retries,
496 timeout=timeout,
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:787, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
785 e = ProtocolError("Connection aborted.", e)
--> 787 retries = retries.increment(
788 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
789 )
790 retries.sleep()
File /opt/conda/lib/python3.10/site-packages/urllib3/util/retry.py:550, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
549 if read is False or not self._is_method_retryable(method):
--> 550 raise six.reraise(type(error), error, _stacktrace)
551 elif read is not None:
File /opt/conda/lib/python3.10/site-packages/urllib3/packages/six.py:769, in reraise(tp, value, tb)
768 if value.__traceback__ is not tb:
--> 769 raise value.with_traceback(tb)
770 raise value
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we're going to release the connection in ``finally:``, then
714 # the response doesn't need to know about the connection. Otherwise
715 # it will also try to release it and we'll have a double-release
716 # mess.
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:449, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
445 except BaseException as e:
446 # Remove the TypeError from the exception chain in
447 # Python 3 (including for exceptions like SystemExit).
448 # Otherwise it looks like a bug in the code.
--> 449 six.raise_from(e, None)
450 except (SocketTimeout, BaseSSLError, SocketError) as e:
File <string>:3, in raise_from(value, from_value)
File /opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py:444, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
443 try:
--> 444 httplib_response = conn.getresponse()
445 except BaseException as e:
446 # Remove the TypeError from the exception chain in
447 # Python 3 (including for exceptions like SystemExit).
448 # Otherwise it looks like a bug in the code.
File /opt/conda/lib/python3.10/http/client.py:1375, in HTTPConnection.getresponse(self)
1374 try:
-> 1375 response.begin()
1376 except ConnectionError:
File /opt/conda/lib/python3.10/http/client.py:337, in HTTPResponse.begin(self)
335 raise UnknownProtocol(version)
--> 337 self.headers = self.msg = parse_headers(self.fp)
339 if self.debuglevel > 0:
File /opt/conda/lib/python3.10/http/client.py:234, in parse_headers(fp, _class)
225 """Parses only RFC2822 headers from a file pointer.
226
227 email Parser wants to see strings rather than bytes.
(...)
232
233 """
--> 234 headers = _read_headers(fp)
235 hstring = b''.join(headers).decode('iso-8859-1')
File /opt/conda/lib/python3.10/http/client.py:216, in _read_headers(fp)
215 if len(line) > _MAXLINE:
--> 216 raise LineTooLong("header line")
217 headers.append(line)
ProtocolError: ('Connection aborted.', LineTooLong('got more than 1048576 bytes when reading header line'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
Cell In[28], line 1
----> 1 one_thous_vcf = one_thous_vcf.union_rows(kg_variants_not_in_samples)
2 #combined_vcf = vcf_mt.union_cols(one_thous_vcf)
3 #combined_vcf.count()
File <decorator-gen-1506>:2, in union_rows(_check_cols, *datasets)
File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
582 @decorator
583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585 return __original_func(*args_, **kwargs_)
File /opt/conda/lib/python3.10/site-packages/hail/matrixtable.py:3958, in MatrixTable.union_rows(_check_cols, *datasets)
3954 raise ValueError(
3955 error_msg.format("col key types", 0, first.col_key.dtype, i + 1, next.col_key.dtype)
3956 )
3957 if _check_cols:
-> 3958 wrong_keys = hl.eval(
3959 hl.rbind(
3960 first.col_key.collect(_localize=False),
3961 lambda first_keys: (
3962 hl.enumerate([mt.col_key.collect(_localize=False) for mt in datasets[1:]]).find(
3963 lambda x: ~(x[1] == first_keys)
3964 )[0]
3965 ),
3966 )
3967 )
3968 if wrong_keys is not None:
3969 raise ValueError(
3970 f"'MatrixTable.union_rows' expects all datasets to have the same columns. "
3971 f"Datasets 0 and {wrong_keys + 1} have different columns (or possibly different order)."
3972 )
File <decorator-gen-570>:2, in eval(expression)
File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
582 @decorator
583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585 return __original_func(*args_, **kwargs_)
File /opt/conda/lib/python3.10/site-packages/hail/expr/expressions/expression_utils.py:194, in eval(expression)
167 @typecheck(expression=expr_any)
168 def eval(expression):
169 """Evaluate a Hail expression, returning the result.
170
171 This method is extremely useful for learning about Hail expressions and
(...)
192 Any
193 """
--> 194 return eval_timed(expression)[0]
File <decorator-gen-568>:2, in eval_timed(expression)
File /opt/conda/lib/python3.10/site-packages/hail/typecheck/check.py:585, in _make_dec.<locals>.wrapper(__original_func, *args, **kwargs)
582 @decorator
583 def wrapper(__original_func: Callable[..., T], *args, **kwargs) -> T:
584 args_, kwargs_ = check_all(__original_func, args, kwargs, checkers, is_method=is_method)
--> 585 return __original_func(*args_, **kwargs_)
File /opt/conda/lib/python3.10/site-packages/hail/expr/expressions/expression_utils.py:164, in eval_timed(expression)
161 uid = Env.get_uid()
162 ir = expression._indices.source.select_globals(**{uid: expression}).index_globals()[uid]._ir
--> 164 return Env.backend().execute(MakeTuple([ir]), timed=True)[0]
File /opt/conda/lib/python3.10/site-packages/hail/backend/spark_backend.py:226, in SparkBackend.execute(self, ir, timed)
223 except Exception as fatal:
224 raise err from fatal
--> 226 raise err
File /opt/conda/lib/python3.10/site-packages/hail/backend/spark_backend.py:218, in SparkBackend.execute(self, ir, timed)
216 def execute(self, ir: BaseIR, timed: bool = False) -> Any:
217 try:
--> 218 return super().execute(ir, timed)
219 except Exception as err:
220 if self._copy_log_on_error:
File /opt/conda/lib/python3.10/site-packages/hail/backend/backend.py:188, in Backend.execute(self, ir, timed)
186 payload = ExecutePayload(self._render_ir(ir), '{"name":"StreamBufferSpec"}', timed)
187 try:
--> 188 result, timings = self._rpc(ActionTag.EXECUTE, payload)
189 except FatalError as e:
190 raise e.maybe_user_error(ir) from None
File /opt/conda/lib/python3.10/site-packages/hail/backend/py4j_backend.py:218, in Py4JBackend._rpc(self, action, payload)
216 path = action_routes[action]
217 port = self._backend_server_port
--> 218 resp = self._requests_session.post(f'http://localhost:{port}{path}', data=data)
219 if resp.status_code >= 400:
220 error_json = orjson.loads(resp.content)
File /opt/conda/lib/python3.10/site-packages/requests/sessions.py:637, in Session.post(self, url, data, json, **kwargs)
626 def post(self, url, data=None, json=None, **kwargs):
627 r"""Sends a POST request. Returns :class:`Response` object.
628
629 :param url: URL for the new :class:`Request` object.
(...)
634 :rtype: requests.Response
635 """
--> 637 return self.request("POST", url, data=data, json=json, **kwargs)
File /opt/conda/lib/python3.10/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File /opt/conda/lib/python3.10/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File /opt/conda/lib/python3.10/site-packages/requests/adapters.py:501, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
(...)
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
--> 501 raise ConnectionError(err, request=request)
503 except MaxRetryError as e:
504 if isinstance(e.reason, ConnectTimeoutError):
505 # TODO: Remove this in 3.0.0: see #2811
ConnectionError: ('Connection aborted.', LineTooLong('got more than 1048576 bytes when reading header line'))