From beaccec7006fbf8612625d79959c42abb8129922 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Fri, 20 Sep 2024 00:50:11 +0200 Subject: [PATCH 01/12] Small speed up to filter_cookies noticed while working on #9203 TODO: show profiles --- aiohttp/cookiejar.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 43e701bfe59..a335f7e830e 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -294,31 +294,28 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": # Create every combination of (domain, path) pairs. pairs = itertools.product(domains, paths) - # Point 2: https://www.rfc-editor.org/rfc/rfc6265.html#section-5.4 - cookies = itertools.chain.from_iterable( - self._cookies[p].values() for p in pairs - ) path_len = len(request_url.path) - for cookie in cookies: - name = cookie.key - domain = cookie["domain"] + # Point 2: https://www.rfc-editor.org/rfc/rfc6265.html#section-5.4 + for p in pairs: + for name, cookie in self._cookies[p].items(): + domain = cookie["domain"] - if (domain, name) in self._host_only_cookies: - if domain != hostname: - continue + if (domain, name) in self._host_only_cookies: + if domain != hostname: + continue - # Skip edge case when the cookie has a trailing slash but request doesn't. - if len(cookie["path"]) > path_len: - continue + # Skip edge case when the cookie has a trailing slash but request doesn't. + if len(cookie["path"]) > path_len: + continue - if is_not_secure and cookie["secure"]: - continue + if is_not_secure and cookie["secure"]: + continue - # It's critical we use the Morsel so the coded_value - # (based on cookie version) is preserved - mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) - mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) - filtered[name] = mrsl_val + # It's critical we use the Morsel so the coded_value + # (based on cookie version) is preserved + mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) + mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) + filtered[name] = mrsl_val return filtered From 0961fe74368ef9894980b8473fe7af3cf5003b7b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 12:51:32 +0200 Subject: [PATCH 02/12] cache construction of morsels --- aiohttp/cookiejar.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index da5c64e06a2..64f92f4accd 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -94,6 +94,9 @@ def __init__( self._cookies: DefaultDict[Tuple[str, str], SimpleCookie] = defaultdict( SimpleCookie ) + self._morsel_cache: DefaultDict[Tuple[str, str], Dict[str, Morsel]] = ( + defaultdict(dict) + ) self._host_only_cookies: Set[Tuple[str, str]] = set() self._unsafe = unsafe self._quote_cookie = quote_cookie @@ -129,6 +132,7 @@ def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: if predicate is None: self._expire_heap.clear() self._cookies.clear() + self._morsel_cache.clear() self._host_only_cookies.clear() self._expirations.clear() return @@ -210,6 +214,7 @@ def _delete_cookies(self, to_del: List[Tuple[str, str, str]]) -> None: for domain, path, name in to_del: self._host_only_cookies.discard((domain, name)) self._cookies[(domain, path)].pop(name, None) + self._morsel_cache[(domain, path)].pop(name, None) self._expirations.pop((domain, path, name), None) def _expire_cookie(self, when: float, domain: str, path: str, name: str) -> None: @@ -289,7 +294,9 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No else: cookie["expires"] = "" - self._cookies[(domain, path)][name] = cookie + key = (domain, path) + self._cookies[key][name] = cookie + self._morsel_cache[key].pop(name, None) self._do_expiration() @@ -358,10 +365,16 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": if is_not_secure and cookie["secure"]: continue + # We already built the Morsel so reuse it here + if name in self._morsel_cache[p]: + filtered[name] = self._morsel_cache[p][name] + continue + # It's critical we use the Morsel so the coded_value # (based on cookie version) is preserved mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) + self._morsel_cache[p][name] = mrsl_val filtered[name] = mrsl_val return filtered From 0f895d80c14fa17164e2645df74008ae56eb27c5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 13:29:48 +0200 Subject: [PATCH 03/12] lint --- aiohttp/cookiejar.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 64f92f4accd..cbd25cfd02c 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -354,9 +354,8 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": for name, cookie in self._cookies[p].items(): domain = cookie["domain"] - if (domain, name) in self._host_only_cookies: - if domain != hostname: - continue + if (domain, name) in self._host_only_cookies and domain != hostname: + continue # Skip edge case when the cookie has a trailing slash but request doesn't. if len(cookie["path"]) > path_len: From abd69b8c1dcc9bb38b130509d28d2a2a66a9aae1 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 15:30:21 +0200 Subject: [PATCH 04/12] adjust --- aiohttp/cookiejar.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index cbd25cfd02c..c49694d1004 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -295,8 +295,11 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No cookie["expires"] = "" key = (domain, path) - self._cookies[key][name] = cookie - self._morsel_cache[key].pop(name, None) + if self._cookies[key].get(name) != cookie: + # Don't blow away the cache is the same + # cookie gets set again + self._cookies[key][name] = cookie + self._morsel_cache[key].pop(name, None) self._do_expiration() From 6fa901c3674193b9c663275b01dd852370547654 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 15:33:08 +0200 Subject: [PATCH 05/12] adjust --- aiohttp/cookiejar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index c49694d1004..239fa6fa9ec 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -245,7 +245,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No domain = cookie["domain"] # ignore domains with trailing dots - if domain.endswith("."): + if domain and domain[-1] == ".": domain = "" del cookie["domain"] @@ -255,7 +255,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No self._host_only_cookies.add((hostname, name)) domain = cookie["domain"] = hostname - if domain.startswith("."): + if domain and domain[0] == ".": # Remove leading dot domain = domain[1:] cookie["domain"] = domain @@ -265,7 +265,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No continue path = cookie["path"] - if not path or not path.startswith("/"): + if not path or path[0] != "/": # Set the cookie's path to the response path path = response_url.path if not path.startswith("/"): From 0f396276a3558271f18996f9d11337dd8ca14dbc Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 15:34:02 +0200 Subject: [PATCH 06/12] adjust --- aiohttp/cookiejar.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 239fa6fa9ec..ac087d02dd7 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -276,8 +276,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No cookie["path"] = path path = path.rstrip("/") - max_age = cookie["max-age"] - if max_age: + if max_age := cookie["max-age"]: try: delta_seconds = int(max_age) max_age_expiration = min(time.time() + delta_seconds, self.MAX_TIME) @@ -285,14 +284,12 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No except ValueError: cookie["max-age"] = "" - else: - expires = cookie["expires"] - if expires: - expire_time = self._parse_date(expires) - if expire_time: - self._expire_cookie(expire_time, domain, path, name) - else: - cookie["expires"] = "" + elif expires := cookie["expires"]: + expire_time = self._parse_date(expires) + if expire_time: + self._expire_cookie(expire_time, domain, path, name) + else: + cookie["expires"] = "" key = (domain, path) if self._cookies[key].get(name) != cookie: From a01813682fa1dd0563b0f2b83c3e47b4a291d7db Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 15:34:38 +0200 Subject: [PATCH 07/12] adjust --- aiohttp/cookiejar.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index ac087d02dd7..ba286f1f5fc 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -285,8 +285,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No cookie["max-age"] = "" elif expires := cookie["expires"]: - expire_time = self._parse_date(expires) - if expire_time: + if expire_time := self._parse_date(expires): self._expire_cookie(expire_time, domain, path, name) else: cookie["expires"] = "" From ad22d779da38e133bb4fc0d012becc0b20758c6d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 15:41:31 -0400 Subject: [PATCH 08/12] typing --- aiohttp/cookiejar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index ba286f1f5fc..d4cf21e4ae7 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -94,7 +94,7 @@ def __init__( self._cookies: DefaultDict[Tuple[str, str], SimpleCookie] = defaultdict( SimpleCookie ) - self._morsel_cache: DefaultDict[Tuple[str, str], Dict[str, Morsel]] = ( + self._morsel_cache: DefaultDict[Tuple[str, str], Dict[str, Morsel[str]]] = ( defaultdict(dict) ) self._host_only_cookies: Set[Tuple[str, str]] = set() From 5b04975fb69c57ee9eb2e1d331540b0e35e1e56f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 16:53:15 -0400 Subject: [PATCH 09/12] bypass all the extra checks since we are copying from cookies to cookies --- aiohttp/cookiejar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index d4cf21e4ae7..33692b3fd3b 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -365,7 +365,7 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": # We already built the Morsel so reuse it here if name in self._morsel_cache[p]: - filtered[name] = self._morsel_cache[p][name] + dict.__setitem__(filtered, name, self._morsel_cache[p][name]) continue # It's critical we use the Morsel so the coded_value @@ -373,7 +373,7 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) self._morsel_cache[p][name] = mrsl_val - filtered[name] = mrsl_val + dict.__setitem__(filtered, name, mrsl_val) return filtered From 32e435a529c8646b99e68f1f250b0bfa26bd5557 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 21 Sep 2024 17:08:11 -0400 Subject: [PATCH 10/12] Revert "bypass all the extra checks since we are copying from cookies to cookies" This reverts commit f496936d5e88091d689c070baa7638b9959e0939. --- aiohttp/cookiejar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 33692b3fd3b..d4cf21e4ae7 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -365,7 +365,7 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": # We already built the Morsel so reuse it here if name in self._morsel_cache[p]: - dict.__setitem__(filtered, name, self._morsel_cache[p][name]) + filtered[name] = self._morsel_cache[p][name] continue # It's critical we use the Morsel so the coded_value @@ -373,7 +373,7 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) self._morsel_cache[p][name] = mrsl_val - dict.__setitem__(filtered, name, mrsl_val) + filtered[name] = mrsl_val return filtered From 734d84ba529969093a4c63bd1fdbc56d3b905788 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 22 Sep 2024 10:48:54 -0500 Subject: [PATCH 11/12] changelog --- CHANGES/9204.misc.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/9204.misc.rst diff --git a/CHANGES/9204.misc.rst b/CHANGES/9204.misc.rst new file mode 100644 index 00000000000..da12a7df6f7 --- /dev/null +++ b/CHANGES/9204.misc.rst @@ -0,0 +1 @@ +Significantly speed up filtering cookies -- by :user:`bdraco`. From efd02baf943231bc7a7fafc447d9506d4c2f2c97 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 22 Sep 2024 12:18:37 -0500 Subject: [PATCH 12/12] Update aiohttp/cookiejar.py --- aiohttp/cookiejar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index d4cf21e4ae7..85fd7716b56 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -292,7 +292,7 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No key = (domain, path) if self._cookies[key].get(name) != cookie: - # Don't blow away the cache is the same + # Don't blow away the cache if the same # cookie gets set again self._cookies[key][name] = cookie self._morsel_cache[key].pop(name, None)