From ac4f65a7c3a65fdfea35f08959fd2788f59b25bd Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Fri, 16 Aug 2024 19:10:56 +0100 Subject: [PATCH] Fix Python parser when chunk separators align (#8720) (cherry picked from commit 6d3d1fcf2583eb7b8330b194c00356ce169b2ebd) --- CHANGES/8720.bugfix.rst | 1 + aiohttp/http_parser.py | 4 ++-- tests/test_http_parser.py | 23 +++++++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 CHANGES/8720.bugfix.rst diff --git a/CHANGES/8720.bugfix.rst b/CHANGES/8720.bugfix.rst new file mode 100644 index 00000000000..9941be27530 --- /dev/null +++ b/CHANGES/8720.bugfix.rst @@ -0,0 +1 @@ +Fixed an edge case in the Python parser when chunk separators happen to align with network chunks -- by :user:`Dreamsorcerer`. diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py index 751a7e1bb73..b992955a011 100644 --- a/aiohttp/http_parser.py +++ b/aiohttp/http_parser.py @@ -870,13 +870,13 @@ def feed_data( self._chunk_size = 0 self.payload.feed_data(chunk[:required], required) chunk = chunk[required:] - if self._lax and chunk.startswith(b"\r"): - chunk = chunk[1:] self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF self.payload.end_http_chunk_receiving() # toss the CRLF at the end of the chunk if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: + if self._lax and chunk.startswith(b"\r"): + chunk = chunk[1:] if chunk[: len(SEP)] == SEP: chunk = chunk[len(SEP) :] self._chunk = ChunkState.PARSE_CHUNKED_SIZE diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py index 0e9aff68dc2..74700df4253 100644 --- a/tests/test_http_parser.py +++ b/tests/test_http_parser.py @@ -1410,6 +1410,29 @@ def test_parse_chunked_payload_empty_body_than_another_chunked( assert b"second" == b"".join(d for d in payload._buffer) +async def test_parse_chunked_payload_split_chunks(response: Any) -> None: + network_chunks = ( + b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n", + b"5\r\nfi", + b"rst", + # This simulates a bug in lax mode caused when the \r\n separator, before the + # next HTTP chunk, appears at the start of the next network chunk. + b"\r\n", + b"6", + b"\r", + b"\n", + b"second\r", + b"\n0\r\n\r\n", + ) + reader = response.feed_data(network_chunks[0])[0][0][1] + for c in network_chunks[1:]: + response.feed_data(c) + + assert response.feed_eof() is None + assert reader.is_eof() + assert await reader.read() == b"firstsecond" + + def test_partial_url(parser: Any) -> None: messages, upgrade, tail = parser.feed_data(b"GET /te") assert len(messages) == 0