From 78d45e733dfeebc0914a409ea2af646fcb6f1c61 Mon Sep 17 00:00:00 2001
From: Sam Bull <git@sambull.org>
Date: Fri, 16 Aug 2024 19:40:24 +0100
Subject: [PATCH] Fix Python parser when chunk separators align (#8720) (#8722)

(cherry picked from commit 6d3d1fcf2583eb7b8330b194c00356ce169b2ebd)
---
 CHANGES/8720.bugfix.rst   |  1 +
 aiohttp/http_parser.py    |  4 ++--
 tests/test_http_parser.py | 23 +++++++++++++++++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 CHANGES/8720.bugfix.rst

diff --git a/CHANGES/8720.bugfix.rst b/CHANGES/8720.bugfix.rst
new file mode 100644
index 00000000000..9941be27530
--- /dev/null
+++ b/CHANGES/8720.bugfix.rst
@@ -0,0 +1 @@
+Fixed an edge case in the Python parser when chunk separators happen to align with network chunks -- by :user:`Dreamsorcerer`.
diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
index 751a7e1bb73..b992955a011 100644
--- a/aiohttp/http_parser.py
+++ b/aiohttp/http_parser.py
@@ -870,13 +870,13 @@ def feed_data(
                         self._chunk_size = 0
                         self.payload.feed_data(chunk[:required], required)
                         chunk = chunk[required:]
-                        if self._lax and chunk.startswith(b"\r"):
-                            chunk = chunk[1:]
                         self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
                         self.payload.end_http_chunk_receiving()
 
                 # toss the CRLF at the end of the chunk
                 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
+                    if self._lax and chunk.startswith(b"\r"):
+                        chunk = chunk[1:]
                     if chunk[: len(SEP)] == SEP:
                         chunk = chunk[len(SEP) :]
                         self._chunk = ChunkState.PARSE_CHUNKED_SIZE
diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
index 0e9aff68dc2..74700df4253 100644
--- a/tests/test_http_parser.py
+++ b/tests/test_http_parser.py
@@ -1410,6 +1410,29 @@ def test_parse_chunked_payload_empty_body_than_another_chunked(
     assert b"second" == b"".join(d for d in payload._buffer)
 
 
+async def test_parse_chunked_payload_split_chunks(response: Any) -> None:
+    network_chunks = (
+        b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n",
+        b"5\r\nfi",
+        b"rst",
+        # This simulates a bug in lax mode caused when the \r\n separator, before the
+        # next HTTP chunk, appears at the start of the next network chunk.
+        b"\r\n",
+        b"6",
+        b"\r",
+        b"\n",
+        b"second\r",
+        b"\n0\r\n\r\n",
+    )
+    reader = response.feed_data(network_chunks[0])[0][0][1]
+    for c in network_chunks[1:]:
+        response.feed_data(c)
+
+    assert response.feed_eof() is None
+    assert reader.is_eof()
+    assert await reader.read() == b"firstsecond"
+
+
 def test_partial_url(parser: Any) -> None:
     messages, upgrade, tail = parser.feed_data(b"GET /te")
     assert len(messages) == 0