diff --git a/Lib/http/client.py b/Lib/http/client.py index 04cd8f7d849861..b756f607d344ef 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -204,15 +204,11 @@ def getallmatchingheaders(self, name): lst.append(line) return lst -def parse_headers(fp, _class=HTTPMessage): - """Parses only RFC2822 headers from a file pointer. - - email Parser wants to see strings rather than bytes. - But a TextIOWrapper around self.rfile would buffer too many bytes - from the stream, bytes which we later need to read as bytes. - So we read the correct bytes here, as bytes, for email Parser - to parse. +def _read_headers(fp): + """Reads potential header lines into a list from a file pointer. + Length of line is limited by _MAXLINE, and number of + headers is limited by _MAXHEADERS. """ headers = [] while True: @@ -224,6 +220,19 @@ def parse_headers(fp, _class=HTTPMessage): raise HTTPException("got more than %d headers" % _MAXHEADERS) if line in (b'\r\n', b'\n', b''): break + return headers + +def parse_headers(fp, _class=HTTPMessage): + """Parses only RFC2822 headers from a file pointer. + + email Parser wants to see strings rather than bytes. + But a TextIOWrapper around self.rfile would buffer too many bytes + from the stream, bytes which we later need to read as bytes. + So we read the correct bytes here, as bytes, for email Parser + to parse. + + """ + headers = _read_headers(fp) hstring = b''.join(headers).decode('iso-8859-1') return email.parser.Parser(_class=_class).parsestr(hstring) @@ -311,15 +320,10 @@ def begin(self): if status != CONTINUE: break # skip the header from the 100 response - while True: - skip = self.fp.readline(_MAXLINE + 1) - if len(skip) > _MAXLINE: - raise LineTooLong("header line") - skip = skip.strip() - if not skip: - break - if self.debuglevel > 0: - print("header:", skip) + skipped_headers = _read_headers(self.fp) + if self.debuglevel > 0: + print("headers:", skipped_headers) + del skipped_headers self.code = self.status = status self.reason = reason.strip() diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 3fa0691d3ad8f2..8333aa0eeef6ac 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -998,6 +998,14 @@ def test_overflowing_header_line(self): resp = client.HTTPResponse(FakeSocket(body)) self.assertRaises(client.LineTooLong, resp.begin) + def test_overflowing_header_limit_after_100(self): + body = ( + 'HTTP/1.1 100 OK\r\n' + 'r\n' * 32768 + ) + resp = client.HTTPResponse(FakeSocket(body)) + self.assertRaises(client.HTTPException, resp.begin) + def test_overflowing_chunked_line(self): body = ( 'HTTP/1.1 200 OK\r\n' @@ -1402,7 +1410,7 @@ def readline(self, limit): class OfflineTest(TestCase): def test_all(self): # Documented objects defined in the module should be in __all__ - expected = {"responses"} # White-list documented dict() object + expected = {"responses"} # Allowlist documented dict() object # HTTPMessage, parse_headers(), and the HTTP status code constants are # intentionally omitted for simplicity blacklist = {"HTTPMessage", "parse_headers"} diff --git a/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst new file mode 100644 index 00000000000000..cf6b63e3961558 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2021-05-05-17-37-04.bpo-44022.bS3XJ9.rst @@ -0,0 +1,2 @@ +mod:`http.client` now avoids infinitely reading potential HTTP headers after a +``100 Continue`` status response from the server.