From c1ead0aa37d5f3526820fcbec1c89011c5063236 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 23 May 2022 11:14:16 -0400 Subject: [PATCH 01/17] Switch to new cache format and new cache location. --- news/2984.bugfix | 1 + src/pip/_internal/cli/req_command.py | 2 +- src/pip/_internal/commands/cache.py | 21 ++++++++++++------ src/pip/_internal/network/cache.py | 32 +++++++++++++++++++++------- tests/functional/test_cache.py | 4 ++-- 5 files changed, 43 insertions(+), 17 deletions(-) create mode 100644 news/2984.bugfix diff --git a/news/2984.bugfix b/news/2984.bugfix new file mode 100644 index 00000000000..d75974349ed --- /dev/null +++ b/news/2984.bugfix @@ -0,0 +1 @@ +pip uses less memory when caching large packages. As a result, there is a new on-disk cache format stored in a new directory ($PIP_CACHE_DIR/http-v2). \ No newline at end of file diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index c2f4e38bed8..c9c2019591e 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -123,7 +123,7 @@ def _build_session( ssl_context = None session = PipSession( - cache=os.path.join(cache_dir, "http") if cache_dir else None, + cache=os.path.join(cache_dir, "http-v2") if cache_dir else None, retries=retries if retries is not None else options.retries, trusted_hosts=options.trusted_hosts, index_urls=self._get_index_urls(options), diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index e96d2b4924c..a11e151f3c8 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -93,17 +93,21 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: num_http_files = len(self._find_http_files(options)) num_packages = len(self._find_wheels(options, "*")) - http_cache_location = self._cache_dir(options, "http") + http_cache_location = self._cache_dir(options, "http-v2") + old_http_cache_location = self._cache_dir(options, "http") wheels_cache_location = self._cache_dir(options, "wheels") http_cache_size = filesystem.format_directory_size(http_cache_location) + old_http_cache_size = filesystem.format_directory_size(old_http_cache_location) wheels_cache_size = filesystem.format_directory_size(wheels_cache_location) message = ( textwrap.dedent( """ - Package index page cache location: {http_cache_location} - Package index page cache size: {http_cache_size} - Number of HTTP files: {num_http_files} + Package index page cache location (new): {http_cache_location} + Package index page cache location (old): {old_http_cache_location} + Package index page cache size (new): {http_cache_size} + Package index page cache size (old): {old_http_cache_size} + Number of HTTP files (old+new cache): {num_http_files} Locally built wheels location: {wheels_cache_location} Locally built wheels size: {wheels_cache_size} Number of locally built wheels: {package_count} @@ -111,7 +115,9 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: ) .format( http_cache_location=http_cache_location, + old_http_cache_location=old_http_cache_location, http_cache_size=http_cache_size, + old_http_cache_size=old_http_cache_size, num_http_files=num_http_files, wheels_cache_location=wheels_cache_location, package_count=num_packages, @@ -195,8 +201,11 @@ def _cache_dir(self, options: Values, subdir: str) -> str: return os.path.join(options.cache_dir, subdir) def _find_http_files(self, options: Values) -> List[str]: - http_dir = self._cache_dir(options, "http") - return filesystem.find_files(http_dir, "*") + old_http_dir = self._cache_dir(options, "http") + new_http_dir = self._cache_dir(options, "http-v2") + return filesystem.find_files(old_http_dir, "*") + filesystem.find_files( + new_http_dir, "*" + ) def _find_wheels(self, options: Values, pattern: str) -> List[str]: wheel_dir = self._cache_dir(options, "wheels") diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index a81a2398519..b85be2e487b 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -3,10 +3,10 @@ import os from contextlib import contextmanager -from typing import Generator, Optional +from typing import BinaryIO, Generator, Optional -from pip._vendor.cachecontrol.cache import BaseCache -from pip._vendor.cachecontrol.caches import FileCache +from pip._vendor.cachecontrol.cache import SeparateBodyBaseCache +from pip._vendor.cachecontrol.caches import SeparateBodyFileCache from pip._vendor.requests.models import Response from pip._internal.utils.filesystem import adjacent_tmp_file, replace @@ -28,7 +28,7 @@ def suppressed_cache_errors() -> Generator[None, None, None]: pass -class SafeFileCache(BaseCache): +class SafeFileCache(SeparateBodyBaseCache): """ A file based cache which is safe to use even when the target directory may not be accessible or writable. @@ -43,7 +43,7 @@ def _get_cache_path(self, name: str) -> str: # From cachecontrol.caches.file_cache.FileCache._fn, brought into our # class for backwards-compatibility and to avoid using a non-public # method. - hashed = FileCache.encode(name) + hashed = SeparateBodyFileCache.encode(name) parts = list(hashed[:5]) + [hashed] return os.path.join(self.directory, *parts) @@ -53,17 +53,33 @@ def get(self, key: str) -> Optional[bytes]: with open(path, "rb") as f: return f.read() - def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None: - path = self._get_cache_path(key) + def _write(self, path: str, data: bytes) -> None: with suppressed_cache_errors(): ensure_dir(os.path.dirname(path)) with adjacent_tmp_file(path) as f: - f.write(value) + f.write(data) replace(f.name, path) + def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None: + path = self._get_cache_path(key) + self._write(path, value) + def delete(self, key: str) -> None: path = self._get_cache_path(key) with suppressed_cache_errors(): os.remove(path) + os.remove(path + ".body") + + def get_body(self, key: str) -> Optional[BinaryIO]: + path = self._get_cache_path(key) + ".body" + with suppressed_cache_errors(): + return open(path, "rb") + + def set_body(self, key: str, body: Optional[bytes]) -> None: + if body is None: + # Workaround for https://github.com/ionrock/cachecontrol/issues/276 + return + path = self._get_cache_path(key) + ".body" + self._write(path, body) diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index 788abdd2be5..5eea6a96e99 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -20,7 +20,7 @@ def cache_dir(script: PipTestEnvironment) -> str: @pytest.fixture def http_cache_dir(cache_dir: str) -> str: - return os.path.normcase(os.path.join(cache_dir, "http")) + return os.path.normcase(os.path.join(cache_dir, "http-v2")) @pytest.fixture @@ -211,7 +211,7 @@ def test_cache_info( ) -> None: result = script.pip("cache", "info") - assert f"Package index page cache location: {http_cache_dir}" in result.stdout + assert f"Package index page cache location (new): {http_cache_dir}" in result.stdout assert f"Locally built wheels location: {wheel_cache_dir}" in result.stdout num_wheels = len(wheel_cache_files) assert f"Number of locally built wheels: {num_wheels}" in result.stdout From fa87c9eb23dd25ad5cb03fe480a3fc4b92deb7a6 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 23 May 2022 13:06:38 -0400 Subject: [PATCH 02/17] Testing for body methods of network cache. --- src/pip/_internal/network/cache.py | 1 + tests/unit/test_network_cache.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index b85be2e487b..11c76bf0f93 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -70,6 +70,7 @@ def delete(self, key: str) -> None: path = self._get_cache_path(key) with suppressed_cache_errors(): os.remove(path) + with suppressed_cache_errors(): os.remove(path + ".body") def get_body(self, key: str) -> Optional[BinaryIO]: diff --git a/tests/unit/test_network_cache.py b/tests/unit/test_network_cache.py index a5519864f4c..88597e4c186 100644 --- a/tests/unit/test_network_cache.py +++ b/tests/unit/test_network_cache.py @@ -31,6 +31,14 @@ def test_cache_roundtrip(self, cache_tmpdir: Path) -> None: cache.delete("test key") assert cache.get("test key") is None + def test_cache_roundtrip_body(self, cache_tmpdir: Path) -> None: + cache = SafeFileCache(os.fspath(cache_tmpdir)) + assert cache.get_body("test key") is None + cache.set_body("test key", b"a test string") + assert cache.get_body("test key").read() == b"a test string" + cache.delete("test key") + assert cache.get_body("test key") is None + @pytest.mark.skipif("sys.platform == 'win32'") def test_safe_get_no_perms( self, cache_tmpdir: Path, monkeypatch: pytest.MonkeyPatch From fde34fdf8416a9692c07a899d2668f3f6ccf9df7 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 12:06:15 -0400 Subject: [PATCH 03/17] Temporary workaround for https://github.com/ionrock/cachecontrol/issues/276 until it's fixed upstream. --- src/pip/_vendor/cachecontrol/controller.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/pip/_vendor/cachecontrol/controller.py b/src/pip/_vendor/cachecontrol/controller.py index 7f23529f115..14ba629768c 100644 --- a/src/pip/_vendor/cachecontrol/controller.py +++ b/src/pip/_vendor/cachecontrol/controller.py @@ -407,7 +407,17 @@ def update_cached_response(self, request, response): """ cache_url = self.cache_url(request.url) - cached_response = self.serializer.loads(request, self.cache.get(cache_url)) + # NOTE: This is a hot-patch for + # https://github.com/ionrock/cachecontrol/issues/276 until it's fixed + # upstream. + if isinstance(self.cache, SeparateBodyBaseCache): + body_file = self.cache.get_body(cache_url) + else: + body_file = None + + cached_response = self.serializer.loads( + request, self.cache.get(cache_url), body_file + ) if not cached_response: # we didn't have a cached response From 5b7c999581e1b892a8048f6bd1275e8501614911 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 12:10:05 -0400 Subject: [PATCH 04/17] Whitespace fix. --- news/2984.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/2984.bugfix b/news/2984.bugfix index d75974349ed..cce561815c9 100644 --- a/news/2984.bugfix +++ b/news/2984.bugfix @@ -1 +1 @@ -pip uses less memory when caching large packages. As a result, there is a new on-disk cache format stored in a new directory ($PIP_CACHE_DIR/http-v2). \ No newline at end of file +pip uses less memory when caching large packages. As a result, there is a new on-disk cache format stored in a new directory ($PIP_CACHE_DIR/http-v2). From 7a609bfdd5a23d404124a0ace5e3598966fe2466 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 12:11:34 -0400 Subject: [PATCH 05/17] Mypy fix. --- tests/unit/test_network_cache.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_network_cache.py b/tests/unit/test_network_cache.py index 88597e4c186..d62d1ab696c 100644 --- a/tests/unit/test_network_cache.py +++ b/tests/unit/test_network_cache.py @@ -35,7 +35,9 @@ def test_cache_roundtrip_body(self, cache_tmpdir: Path) -> None: cache = SafeFileCache(os.fspath(cache_tmpdir)) assert cache.get_body("test key") is None cache.set_body("test key", b"a test string") - assert cache.get_body("test key").read() == b"a test string" + body = cache.get_body("test key") + assert body is not None + assert body.read() == b"a test string" cache.delete("test key") assert cache.get_body("test key") is None From 3dbba12132b55a937c095c1c5537baf8652533ad Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 12:12:29 -0400 Subject: [PATCH 06/17] Correct name. --- news/{2984.bugfix => 2984.bugfix.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename news/{2984.bugfix => 2984.bugfix.rst} (100%) diff --git a/news/2984.bugfix b/news/2984.bugfix.rst similarity index 100% rename from news/2984.bugfix rename to news/2984.bugfix.rst From bff05e5622b1dcf66c1556fb421441086b93456c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 14:50:29 -0400 Subject: [PATCH 07/17] Switch to proposed upstream fix. --- src/pip/_internal/network/cache.py | 3 -- src/pip/_vendor/cachecontrol/controller.py | 57 +++++++++++----------- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index 11c76bf0f93..f52e9974fc2 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -79,8 +79,5 @@ def get_body(self, key: str) -> Optional[BinaryIO]: return open(path, "rb") def set_body(self, key: str, body: Optional[bytes]) -> None: - if body is None: - # Workaround for https://github.com/ionrock/cachecontrol/issues/276 - return path = self._get_cache_path(key) + ".body" self._write(path, body) diff --git a/src/pip/_vendor/cachecontrol/controller.py b/src/pip/_vendor/cachecontrol/controller.py index 14ba629768c..7af0e002da0 100644 --- a/src/pip/_vendor/cachecontrol/controller.py +++ b/src/pip/_vendor/cachecontrol/controller.py @@ -122,6 +122,26 @@ def parse_cache_control(self, headers): return retval + def _load_from_cache(self, request): + """ + Load a cached response, or return None if it's not available. + """ + cache_url = request.url + cache_data = self.cache.get(cache_url) + if cache_data is None: + logger.debug("No cache entry available") + return None + + if isinstance(self.cache, SeparateBodyBaseCache): + body_file = self.cache.get_body(cache_url) + else: + body_file = None + + result = self.serializer.loads(request, cache_data, body_file) + if result is None: + logger.warning("Cache entry deserialization failed, entry ignored") + return result + def cached_request(self, request): """ Return a cached response if it exists in the cache, otherwise @@ -140,21 +160,9 @@ def cached_request(self, request): logger.debug('Request header has "max_age" as 0, cache bypassed') return False - # Request allows serving from the cache, let's see if we find something - cache_data = self.cache.get(cache_url) - if cache_data is None: - logger.debug("No cache entry available") - return False - - if isinstance(self.cache, SeparateBodyBaseCache): - body_file = self.cache.get_body(cache_url) - else: - body_file = None - - # Check whether it can be deserialized - resp = self.serializer.loads(request, cache_data, body_file) + # Check whether we can load the response from the cache: + resp = self._load_from_cache(request) if not resp: - logger.warning("Cache entry deserialization failed, entry ignored") return False # If we have a cached permanent redirect, return it immediately. We @@ -240,8 +248,7 @@ def cached_request(self, request): return False def conditional_headers(self, request): - cache_url = self.cache_url(request.url) - resp = self.serializer.loads(request, self.cache.get(cache_url)) + resp = self._load_from_cache(request) new_headers = {} if resp: @@ -267,7 +274,10 @@ def _cache_set(self, cache_url, request, response, body=None, expires_time=None) self.serializer.dumps(request, response, b""), expires=expires_time, ) - self.cache.set_body(cache_url, body) + # body is None can happen when, for example, we're only updating + # headers, as is the case in update_cached_response(). + if body is not None: + self.cache.set_body(cache_url, body) else: self.cache.set( cache_url, @@ -406,18 +416,7 @@ def update_cached_response(self, request, response): gotten a 304 as the response. """ cache_url = self.cache_url(request.url) - - # NOTE: This is a hot-patch for - # https://github.com/ionrock/cachecontrol/issues/276 until it's fixed - # upstream. - if isinstance(self.cache, SeparateBodyBaseCache): - body_file = self.cache.get_body(cache_url) - else: - body_file = None - - cached_response = self.serializer.loads( - request, self.cache.get(cache_url), body_file - ) + cached_response = self._load_from_cache(request) if not cached_response: # we didn't have a cached response From 46f9154daecffa52966f6e917f0819cfabb112ad Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 15:11:16 -0400 Subject: [PATCH 08/17] Make sure the file gets closed. --- tests/unit/test_network_cache.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_network_cache.py b/tests/unit/test_network_cache.py index d62d1ab696c..aa849f3b03a 100644 --- a/tests/unit/test_network_cache.py +++ b/tests/unit/test_network_cache.py @@ -37,7 +37,8 @@ def test_cache_roundtrip_body(self, cache_tmpdir: Path) -> None: cache.set_body("test key", b"a test string") body = cache.get_body("test key") assert body is not None - assert body.read() == b"a test string" + with body: + assert body.read() == b"a test string" cache.delete("test key") assert cache.get_body("test key") is None From bada6316dfcb16d50f214b88f8d2424f0e9d990b Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 24 May 2022 15:13:46 -0400 Subject: [PATCH 09/17] More accurate type. --- src/pip/_internal/network/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index f52e9974fc2..d6b8ccdcf36 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -78,6 +78,6 @@ def get_body(self, key: str) -> Optional[BinaryIO]: with suppressed_cache_errors(): return open(path, "rb") - def set_body(self, key: str, body: Optional[bytes]) -> None: + def set_body(self, key: str, body: bytes) -> None: path = self._get_cache_path(key) + ".body" self._write(path, body) From ca08c16b9e81ce21021831fb1bdfe3a76387fd25 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 2 Jun 2023 13:56:56 -0400 Subject: [PATCH 10/17] Vendor latest version of CacheControl. --- src/pip/_vendor/cachecontrol.pyi | 1 - src/pip/_vendor/cachecontrol/__init__.py | 18 ++- src/pip/_vendor/cachecontrol/_cmd.py | 24 ++- src/pip/_vendor/cachecontrol/adapter.py | 83 +++++++---- src/pip/_vendor/cachecontrol/cache.py | 31 ++-- .../_vendor/cachecontrol/caches/__init__.py | 5 +- .../_vendor/cachecontrol/caches/file_cache.py | 78 +++++----- .../cachecontrol/caches/redis_cache.py | 29 ++-- src/pip/_vendor/cachecontrol/compat.py | 32 ---- src/pip/_vendor/cachecontrol/controller.py | 116 ++++++++++----- src/pip/_vendor/cachecontrol/filewrapper.py | 27 ++-- src/pip/_vendor/cachecontrol/heuristics.py | 54 ++++--- src/pip/_vendor/cachecontrol/py.typed | 0 src/pip/_vendor/cachecontrol/serialize.py | 139 ++++++++++++------ src/pip/_vendor/cachecontrol/wrapper.py | 33 +++-- src/pip/_vendor/vendor.txt | 2 +- 16 files changed, 407 insertions(+), 265 deletions(-) delete mode 100644 src/pip/_vendor/cachecontrol.pyi delete mode 100644 src/pip/_vendor/cachecontrol/compat.py create mode 100644 src/pip/_vendor/cachecontrol/py.typed diff --git a/src/pip/_vendor/cachecontrol.pyi b/src/pip/_vendor/cachecontrol.pyi deleted file mode 100644 index 636a66bacaf..00000000000 --- a/src/pip/_vendor/cachecontrol.pyi +++ /dev/null @@ -1 +0,0 @@ -from cachecontrol import * \ No newline at end of file diff --git a/src/pip/_vendor/cachecontrol/__init__.py b/src/pip/_vendor/cachecontrol/__init__.py index f631ae6df47..3701cdd6be8 100644 --- a/src/pip/_vendor/cachecontrol/__init__.py +++ b/src/pip/_vendor/cachecontrol/__init__.py @@ -8,11 +8,21 @@ """ __author__ = "Eric Larson" __email__ = "eric@ionrock.org" -__version__ = "0.12.11" +__version__ = "0.13.0" -from .wrapper import CacheControl -from .adapter import CacheControlAdapter -from .controller import CacheController +from pip._vendor.cachecontrol.adapter import CacheControlAdapter +from pip._vendor.cachecontrol.controller import CacheController +from pip._vendor.cachecontrol.wrapper import CacheControl + +__all__ = [ + "__author__", + "__email__", + "__version__", + "CacheControlAdapter", + "CacheController", + "CacheControl", +] import logging + logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/src/pip/_vendor/cachecontrol/_cmd.py b/src/pip/_vendor/cachecontrol/_cmd.py index 4266b5ee92a..ab4dac3dde1 100644 --- a/src/pip/_vendor/cachecontrol/_cmd.py +++ b/src/pip/_vendor/cachecontrol/_cmd.py @@ -3,6 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 import logging +from argparse import ArgumentParser +from typing import TYPE_CHECKING from pip._vendor import requests @@ -10,16 +12,19 @@ from pip._vendor.cachecontrol.cache import DictCache from pip._vendor.cachecontrol.controller import logger -from argparse import ArgumentParser +if TYPE_CHECKING: + from argparse import Namespace + from pip._vendor.cachecontrol.controller import CacheController -def setup_logging(): + +def setup_logging() -> None: logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() logger.addHandler(handler) -def get_session(): +def get_session() -> requests.Session: adapter = CacheControlAdapter( DictCache(), cache_etags=True, serializer=None, heuristic=None ) @@ -27,17 +32,17 @@ def get_session(): sess.mount("http://", adapter) sess.mount("https://", adapter) - sess.cache_controller = adapter.controller + sess.cache_controller = adapter.controller # type: ignore[attr-defined] return sess -def get_args(): +def get_args() -> "Namespace": parser = ArgumentParser() parser.add_argument("url", help="The URL to try and cache") return parser.parse_args() -def main(args=None): +def main() -> None: args = get_args() sess = get_session() @@ -48,10 +53,13 @@ def main(args=None): setup_logging() # try setting the cache - sess.cache_controller.cache_response(resp.request, resp.raw) + cache_controller: "CacheController" = ( + sess.cache_controller # type: ignore[attr-defined] + ) + cache_controller.cache_response(resp.request, resp.raw) # Now try to get it - if sess.cache_controller.cached_request(resp.request): + if cache_controller.cached_request(resp.request): print("Cached!") else: print("Not cached :(") diff --git a/src/pip/_vendor/cachecontrol/adapter.py b/src/pip/_vendor/cachecontrol/adapter.py index 94c75e1a05b..83c08e003fe 100644 --- a/src/pip/_vendor/cachecontrol/adapter.py +++ b/src/pip/_vendor/cachecontrol/adapter.py @@ -2,15 +2,24 @@ # # SPDX-License-Identifier: Apache-2.0 -import types import functools +import types import zlib +from typing import TYPE_CHECKING, Any, Collection, Mapping, Optional, Tuple, Type, Union from pip._vendor.requests.adapters import HTTPAdapter -from .controller import CacheController, PERMANENT_REDIRECT_STATUSES -from .cache import DictCache -from .filewrapper import CallbackFileWrapper +from pip._vendor.cachecontrol.cache import DictCache +from pip._vendor.cachecontrol.controller import PERMANENT_REDIRECT_STATUSES, CacheController +from pip._vendor.cachecontrol.filewrapper import CallbackFileWrapper + +if TYPE_CHECKING: + from pip._vendor.requests import PreparedRequest, Response + from pip._vendor.urllib3 import HTTPResponse + + from pip._vendor.cachecontrol.cache import BaseCache + from pip._vendor.cachecontrol.heuristics import BaseHeuristic + from pip._vendor.cachecontrol.serialize import Serializer class CacheControlAdapter(HTTPAdapter): @@ -18,15 +27,15 @@ class CacheControlAdapter(HTTPAdapter): def __init__( self, - cache=None, - cache_etags=True, - controller_class=None, - serializer=None, - heuristic=None, - cacheable_methods=None, - *args, - **kw - ): + cache: Optional["BaseCache"] = None, + cache_etags: bool = True, + controller_class: Optional[Type[CacheController]] = None, + serializer: Optional["Serializer"] = None, + heuristic: Optional["BaseHeuristic"] = None, + cacheable_methods: Optional[Collection[str]] = None, + *args: Any, + **kw: Any, + ) -> None: super(CacheControlAdapter, self).__init__(*args, **kw) self.cache = DictCache() if cache is None else cache self.heuristic = heuristic @@ -37,7 +46,18 @@ def __init__( self.cache, cache_etags=cache_etags, serializer=serializer ) - def send(self, request, cacheable_methods=None, **kw): + def send( + self, + request: "PreparedRequest", + stream: bool = False, + timeout: Union[None, float, Tuple[float, float], Tuple[float, None]] = None, + verify: Union[bool, str] = True, + cert: Union[ + None, bytes, str, Tuple[Union[bytes, str], Union[bytes, str]] + ] = None, + proxies: Optional[Mapping[str, str]] = None, + cacheable_methods: Optional[Collection[str]] = None, + ) -> "Response": """ Send a request. Use the request information to see if it exists in the cache and cache the response if we need to and can. @@ -54,13 +74,19 @@ def send(self, request, cacheable_methods=None, **kw): # check for etags and add headers if appropriate request.headers.update(self.controller.conditional_headers(request)) - resp = super(CacheControlAdapter, self).send(request, **kw) + resp = super(CacheControlAdapter, self).send( + request, stream, timeout, verify, cert, proxies + ) return resp def build_response( - self, request, response, from_cache=False, cacheable_methods=None - ): + self, + request: "PreparedRequest", + response: "HTTPResponse", + from_cache: bool = False, + cacheable_methods: Optional[Collection[str]] = None, + ) -> "Response": """ Build a response by making a request or using the cache. @@ -102,36 +128,39 @@ def build_response( else: # Wrap the response file with a wrapper that will cache the # response when the stream has been consumed. - response._fp = CallbackFileWrapper( - response._fp, + response._fp = CallbackFileWrapper( # type: ignore[attr-defined] + response._fp, # type: ignore[attr-defined] functools.partial( self.controller.cache_response, request, response ), ) if response.chunked: - super_update_chunk_length = response._update_chunk_length + super_update_chunk_length = response._update_chunk_length # type: ignore[attr-defined] - def _update_chunk_length(self): + def _update_chunk_length(self: "HTTPResponse") -> None: super_update_chunk_length() if self.chunk_left == 0: - self._fp._close() + self._fp._close() # type: ignore[attr-defined] - response._update_chunk_length = types.MethodType( + response._update_chunk_length = types.MethodType( # type: ignore[attr-defined] _update_chunk_length, response ) - resp = super(CacheControlAdapter, self).build_response(request, response) + resp: "Response" = super( # type: ignore[no-untyped-call] + CacheControlAdapter, self + ).build_response(request, response) # See if we should invalidate the cache. if request.method in self.invalidating_methods and resp.ok: + assert request.url is not None cache_url = self.controller.cache_url(request.url) self.cache.delete(cache_url) # Give the request a from_cache attr to let people use it - resp.from_cache = from_cache + resp.from_cache = from_cache # type: ignore[attr-defined] return resp - def close(self): + def close(self) -> None: self.cache.close() - super(CacheControlAdapter, self).close() + super(CacheControlAdapter, self).close() # type: ignore[no-untyped-call] diff --git a/src/pip/_vendor/cachecontrol/cache.py b/src/pip/_vendor/cachecontrol/cache.py index 2a965f595ff..61031d23441 100644 --- a/src/pip/_vendor/cachecontrol/cache.py +++ b/src/pip/_vendor/cachecontrol/cache.py @@ -7,37 +7,43 @@ safe in-memory dictionary. """ from threading import Lock +from typing import IO, TYPE_CHECKING, MutableMapping, Optional, Union +if TYPE_CHECKING: + from datetime import datetime -class BaseCache(object): - def get(self, key): +class BaseCache(object): + def get(self, key: str) -> Optional[bytes]: raise NotImplementedError() - def set(self, key, value, expires=None): + def set( + self, key: str, value: bytes, expires: Optional[Union[int, "datetime"]] = None + ) -> None: raise NotImplementedError() - def delete(self, key): + def delete(self, key: str) -> None: raise NotImplementedError() - def close(self): + def close(self) -> None: pass class DictCache(BaseCache): - - def __init__(self, init_dict=None): + def __init__(self, init_dict: Optional[MutableMapping[str, bytes]] = None) -> None: self.lock = Lock() self.data = init_dict or {} - def get(self, key): + def get(self, key: str) -> Optional[bytes]: return self.data.get(key, None) - def set(self, key, value, expires=None): + def set( + self, key: str, value: bytes, expires: Optional[Union[int, "datetime"]] = None + ) -> None: with self.lock: self.data.update({key: value}) - def delete(self, key): + def delete(self, key: str) -> None: with self.lock: if key in self.data: self.data.pop(key) @@ -55,10 +61,11 @@ class SeparateBodyBaseCache(BaseCache): Similarly, the body should be loaded separately via ``get_body()``. """ - def set_body(self, key, body): + + def set_body(self, key: str, body: bytes) -> None: raise NotImplementedError() - def get_body(self, key): + def get_body(self, key: str) -> Optional["IO[bytes]"]: """ Return the body as file-like object. """ diff --git a/src/pip/_vendor/cachecontrol/caches/__init__.py b/src/pip/_vendor/cachecontrol/caches/__init__.py index 37827291fb5..24ff469ff98 100644 --- a/src/pip/_vendor/cachecontrol/caches/__init__.py +++ b/src/pip/_vendor/cachecontrol/caches/__init__.py @@ -2,8 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -from .file_cache import FileCache, SeparateBodyFileCache -from .redis_cache import RedisCache - +from pip._vendor.cachecontrol.caches.file_cache import FileCache, SeparateBodyFileCache +from pip._vendor.cachecontrol.caches.redis_cache import RedisCache __all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"] diff --git a/src/pip/_vendor/cachecontrol/caches/file_cache.py b/src/pip/_vendor/cachecontrol/caches/file_cache.py index f1ddb2ebdf9..0437c4e8a13 100644 --- a/src/pip/_vendor/cachecontrol/caches/file_cache.py +++ b/src/pip/_vendor/cachecontrol/caches/file_cache.py @@ -5,18 +5,18 @@ import hashlib import os from textwrap import dedent +from typing import IO, TYPE_CHECKING, Optional, Type, Union -from ..cache import BaseCache, SeparateBodyBaseCache -from ..controller import CacheController +from pip._vendor.cachecontrol.cache import BaseCache, SeparateBodyBaseCache +from pip._vendor.cachecontrol.controller import CacheController -try: - FileNotFoundError -except NameError: - # py2.X - FileNotFoundError = (IOError, OSError) +if TYPE_CHECKING: + from datetime import datetime + from filelock import BaseFileLock -def _secure_open_write(filename, fmode): + +def _secure_open_write(filename: str, fmode: int) -> "IO[bytes]": # We only want to write to this file, so open it in write only mode flags = os.O_WRONLY @@ -62,37 +62,27 @@ class _FileCacheMixin: def __init__( self, - directory, - forever=False, - filemode=0o0600, - dirmode=0o0700, - use_dir_lock=None, - lock_class=None, - ): - - if use_dir_lock is not None and lock_class is not None: - raise ValueError("Cannot use use_dir_lock and lock_class together") - + directory: str, + forever: bool = False, + filemode: int = 0o0600, + dirmode: int = 0o0700, + lock_class: Optional[Type["BaseFileLock"]] = None, + ) -> None: try: - from lockfile import LockFile - from lockfile.mkdirlockfile import MkdirLockFile + if lock_class is None: + from filelock import FileLock + + lock_class = FileLock except ImportError: notice = dedent( """ NOTE: In order to use the FileCache you must have - lockfile installed. You can install it via pip: - pip install lockfile + filelock installed. You can install it via pip: + pip install filelock """ ) raise ImportError(notice) - else: - if use_dir_lock: - lock_class = MkdirLockFile - - elif lock_class is None: - lock_class = LockFile - self.directory = directory self.forever = forever self.filemode = filemode @@ -100,17 +90,17 @@ def __init__( self.lock_class = lock_class @staticmethod - def encode(x): + def encode(x: str) -> str: return hashlib.sha224(x.encode()).hexdigest() - def _fn(self, name): + def _fn(self, name: str) -> str: # NOTE: This method should not change as some may depend on it. # See: https://github.com/ionrock/cachecontrol/issues/63 hashed = self.encode(name) parts = list(hashed[:5]) + [hashed] return os.path.join(self.directory, *parts) - def get(self, key): + def get(self, key: str) -> Optional[bytes]: name = self._fn(key) try: with open(name, "rb") as fh: @@ -119,11 +109,13 @@ def get(self, key): except FileNotFoundError: return None - def set(self, key, value, expires=None): + def set( + self, key: str, value: bytes, expires: Optional[Union[int, "datetime"]] = None + ) -> None: name = self._fn(key) self._write(name, value) - def _write(self, path, data: bytes): + def _write(self, path: str, data: bytes) -> None: """ Safely write the data to the given path. """ @@ -133,12 +125,12 @@ def _write(self, path, data: bytes): except (IOError, OSError): pass - with self.lock_class(path) as lock: + with self.lock_class(path + ".lock"): # Write our actual file - with _secure_open_write(lock.path, self.filemode) as fh: + with _secure_open_write(path, self.filemode) as fh: fh.write(data) - def _delete(self, key, suffix): + def _delete(self, key: str, suffix: str) -> None: name = self._fn(key) + suffix if not self.forever: try: @@ -153,7 +145,7 @@ class FileCache(_FileCacheMixin, BaseCache): downloads. """ - def delete(self, key): + def delete(self, key: str) -> None: self._delete(key, "") @@ -163,23 +155,23 @@ class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache): peak memory usage. """ - def get_body(self, key): + def get_body(self, key: str) -> Optional["IO[bytes]"]: name = self._fn(key) + ".body" try: return open(name, "rb") except FileNotFoundError: return None - def set_body(self, key, body): + def set_body(self, key: str, body: bytes) -> None: name = self._fn(key) + ".body" self._write(name, body) - def delete(self, key): + def delete(self, key: str) -> None: self._delete(key, "") self._delete(key, ".body") -def url_to_file_path(url, filecache): +def url_to_file_path(url: str, filecache: FileCache) -> str: """Return the file cache path based on the URL. This does not ensure the file exists! diff --git a/src/pip/_vendor/cachecontrol/caches/redis_cache.py b/src/pip/_vendor/cachecontrol/caches/redis_cache.py index 2cba4b07080..f7ae45d3828 100644 --- a/src/pip/_vendor/cachecontrol/caches/redis_cache.py +++ b/src/pip/_vendor/cachecontrol/caches/redis_cache.py @@ -4,36 +4,45 @@ from __future__ import division -from datetime import datetime +from datetime import datetime, timezone +from typing import TYPE_CHECKING, Optional, Union + from pip._vendor.cachecontrol.cache import BaseCache +if TYPE_CHECKING: + from redis import Redis -class RedisCache(BaseCache): - def __init__(self, conn): +class RedisCache(BaseCache): + def __init__(self, conn: "Redis[bytes]") -> None: self.conn = conn - def get(self, key): + def get(self, key: str) -> Optional[bytes]: return self.conn.get(key) - def set(self, key, value, expires=None): + def set( + self, key: str, value: bytes, expires: Optional[Union[int, datetime]] = None + ) -> None: if not expires: self.conn.set(key, value) elif isinstance(expires, datetime): - expires = expires - datetime.utcnow() - self.conn.setex(key, int(expires.total_seconds()), value) + now_utc = datetime.now(timezone.utc) + if expires.tzinfo is None: + now_utc = now_utc.replace(tzinfo=None) + delta = expires - now_utc + self.conn.setex(key, int(delta.total_seconds()), value) else: self.conn.setex(key, expires, value) - def delete(self, key): + def delete(self, key: str) -> None: self.conn.delete(key) - def clear(self): + def clear(self) -> None: """Helper for clearing all the keys in a database. Use with caution!""" for key in self.conn.keys(): self.conn.delete(key) - def close(self): + def close(self) -> None: """Redis uses connection pooling, no need to close the connection.""" pass diff --git a/src/pip/_vendor/cachecontrol/compat.py b/src/pip/_vendor/cachecontrol/compat.py deleted file mode 100644 index ccec9379dba..00000000000 --- a/src/pip/_vendor/cachecontrol/compat.py +++ /dev/null @@ -1,32 +0,0 @@ -# SPDX-FileCopyrightText: 2015 Eric Larson -# -# SPDX-License-Identifier: Apache-2.0 - -try: - from urllib.parse import urljoin -except ImportError: - from urlparse import urljoin - - -try: - import cPickle as pickle -except ImportError: - import pickle - -# Handle the case where the requests module has been patched to not have -# urllib3 bundled as part of its source. -try: - from pip._vendor.requests.packages.urllib3.response import HTTPResponse -except ImportError: - from pip._vendor.urllib3.response import HTTPResponse - -try: - from pip._vendor.requests.packages.urllib3.util import is_fp_closed -except ImportError: - from pip._vendor.urllib3.util import is_fp_closed - -# Replicate some six behaviour -try: - text_type = unicode -except NameError: - text_type = str diff --git a/src/pip/_vendor/cachecontrol/controller.py b/src/pip/_vendor/cachecontrol/controller.py index 7af0e002da0..3365d962130 100644 --- a/src/pip/_vendor/cachecontrol/controller.py +++ b/src/pip/_vendor/cachecontrol/controller.py @@ -5,17 +5,25 @@ """ The httplib2 algorithms ported for use with requests. """ +import calendar import logging import re -import calendar import time from email.utils import parsedate_tz +from typing import TYPE_CHECKING, Collection, Dict, Mapping, Optional, Tuple, Union from pip._vendor.requests.structures import CaseInsensitiveDict -from .cache import DictCache, SeparateBodyBaseCache -from .serialize import Serializer +from pip._vendor.cachecontrol.cache import DictCache, SeparateBodyBaseCache +from pip._vendor.cachecontrol.serialize import Serializer + +if TYPE_CHECKING: + from typing import Literal + from pip._vendor.requests import PreparedRequest + from pip._vendor.urllib3 import HTTPResponse + + from pip._vendor.cachecontrol.cache import BaseCache logger = logging.getLogger(__name__) @@ -24,12 +32,14 @@ PERMANENT_REDIRECT_STATUSES = (301, 308) -def parse_uri(uri): +def parse_uri(uri: str) -> Tuple[str, str, str, str, str]: """Parses a URI using the regex given in Appendix B of RFC 3986. (scheme, authority, path, query, fragment) = parse_uri(uri) """ - groups = URI.match(uri).groups() + match = URI.match(uri) + assert match is not None + groups = match.groups() return (groups[1], groups[3], groups[4], groups[6], groups[8]) @@ -37,7 +47,11 @@ class CacheController(object): """An interface to see if request should cached or not.""" def __init__( - self, cache=None, cache_etags=True, serializer=None, status_codes=None + self, + cache: Optional["BaseCache"] = None, + cache_etags: bool = True, + serializer: Optional[Serializer] = None, + status_codes: Optional[Collection[int]] = None, ): self.cache = DictCache() if cache is None else cache self.cache_etags = cache_etags @@ -45,7 +59,7 @@ def __init__( self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308) @classmethod - def _urlnorm(cls, uri): + def _urlnorm(cls, uri: str) -> str: """Normalize the URL to create a safe key for the cache""" (scheme, authority, path, query, fragment) = parse_uri(uri) if not scheme or not authority: @@ -65,10 +79,12 @@ def _urlnorm(cls, uri): return defrag_uri @classmethod - def cache_url(cls, uri): + def cache_url(cls, uri: str) -> str: return cls._urlnorm(uri) - def parse_cache_control(self, headers): + def parse_cache_control( + self, headers: Mapping[str, str] + ) -> Dict[str, Optional[int]]: known_directives = { # https://tools.ietf.org/html/rfc7234#section-5.2 "max-age": (int, True), @@ -87,7 +103,7 @@ def parse_cache_control(self, headers): cc_headers = headers.get("cache-control", headers.get("Cache-Control", "")) - retval = {} + retval: Dict[str, Optional[int]] = {} for cc_directive in cc_headers.split(","): if not cc_directive.strip(): @@ -122,11 +138,12 @@ def parse_cache_control(self, headers): return retval - def _load_from_cache(self, request): + def _load_from_cache(self, request: "PreparedRequest") -> Optional["HTTPResponse"]: """ Load a cached response, or return None if it's not available. """ cache_url = request.url + assert cache_url is not None cache_data = self.cache.get(cache_url) if cache_data is None: logger.debug("No cache entry available") @@ -142,11 +159,14 @@ def _load_from_cache(self, request): logger.warning("Cache entry deserialization failed, entry ignored") return result - def cached_request(self, request): + def cached_request( + self, request: "PreparedRequest" + ) -> Union["HTTPResponse", "Literal[False]"]: """ Return a cached response if it exists in the cache, otherwise return False. """ + assert request.url is not None cache_url = self.cache_url(request.url) logger.debug('Looking up "%s" in the cache', cache_url) cc = self.parse_cache_control(request.headers) @@ -182,7 +202,7 @@ def cached_request(self, request): logger.debug(msg) return resp - headers = CaseInsensitiveDict(resp.headers) + headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers) if not headers or "date" not in headers: if "etag" not in headers: # Without date or etag, the cached response can never be used @@ -193,7 +213,9 @@ def cached_request(self, request): return False now = time.time() - date = calendar.timegm(parsedate_tz(headers["date"])) + time_tuple = parsedate_tz(headers["date"]) + assert time_tuple is not None + date = calendar.timegm(time_tuple[:6]) current_age = max(0, now - date) logger.debug("Current age based on date: %i", current_age) @@ -207,28 +229,30 @@ def cached_request(self, request): freshness_lifetime = 0 # Check the max-age pragma in the cache control header - if "max-age" in resp_cc: - freshness_lifetime = resp_cc["max-age"] + max_age = resp_cc.get("max-age") + if max_age is not None: + freshness_lifetime = max_age logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime) # If there isn't a max-age, check for an expires header elif "expires" in headers: expires = parsedate_tz(headers["expires"]) if expires is not None: - expire_time = calendar.timegm(expires) - date + expire_time = calendar.timegm(expires[:6]) - date freshness_lifetime = max(0, expire_time) logger.debug("Freshness lifetime from expires: %i", freshness_lifetime) # Determine if we are setting freshness limit in the # request. Note, this overrides what was in the response. - if "max-age" in cc: - freshness_lifetime = cc["max-age"] + max_age = cc.get("max-age") + if max_age is not None: + freshness_lifetime = max_age logger.debug( "Freshness lifetime from request max-age: %i", freshness_lifetime ) - if "min-fresh" in cc: - min_fresh = cc["min-fresh"] + min_fresh = cc.get("min-fresh") + if min_fresh is not None: # adjust our current age by our min fresh current_age += min_fresh logger.debug("Adjusted current age from min-fresh: %i", current_age) @@ -247,12 +271,12 @@ def cached_request(self, request): # return the original handler return False - def conditional_headers(self, request): + def conditional_headers(self, request: "PreparedRequest") -> Dict[str, str]: resp = self._load_from_cache(request) new_headers = {} if resp: - headers = CaseInsensitiveDict(resp.headers) + headers: CaseInsensitiveDict[str] = CaseInsensitiveDict(resp.headers) if "etag" in headers: new_headers["If-None-Match"] = headers["ETag"] @@ -262,7 +286,14 @@ def conditional_headers(self, request): return new_headers - def _cache_set(self, cache_url, request, response, body=None, expires_time=None): + def _cache_set( + self, + cache_url: str, + request: "PreparedRequest", + response: "HTTPResponse", + body: Optional[bytes] = None, + expires_time: Optional[int] = None, + ) -> None: """ Store the data in the cache. """ @@ -285,7 +316,13 @@ def _cache_set(self, cache_url, request, response, body=None, expires_time=None) expires=expires_time, ) - def cache_response(self, request, response, body=None, status_codes=None): + def cache_response( + self, + request: "PreparedRequest", + response: "HTTPResponse", + body: Optional[bytes] = None, + status_codes: Optional[Collection[int]] = None, + ) -> None: """ Algorithm for caching requests. @@ -300,10 +337,14 @@ def cache_response(self, request, response, body=None, status_codes=None): ) return - response_headers = CaseInsensitiveDict(response.headers) + response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( + response.headers + ) if "date" in response_headers: - date = calendar.timegm(parsedate_tz(response_headers["date"])) + time_tuple = parsedate_tz(response_headers["date"]) + assert time_tuple is not None + date = calendar.timegm(time_tuple[:6]) else: date = 0 @@ -322,6 +363,7 @@ def cache_response(self, request, response, body=None, status_codes=None): cc_req = self.parse_cache_control(request.headers) cc = self.parse_cache_control(response_headers) + assert request.url is not None cache_url = self.cache_url(request.url) logger.debug('Updating cache with response from "%s"', cache_url) @@ -354,7 +396,7 @@ def cache_response(self, request, response, body=None, status_codes=None): if response_headers.get("expires"): expires = parsedate_tz(response_headers["expires"]) if expires is not None: - expires_time = calendar.timegm(expires) - date + expires_time = calendar.timegm(expires[:6]) - date expires_time = max(expires_time, 14 * 86400) @@ -372,11 +414,14 @@ def cache_response(self, request, response, body=None, status_codes=None): # is no date header then we can't do anything about expiring # the cache. elif "date" in response_headers: - date = calendar.timegm(parsedate_tz(response_headers["date"])) + time_tuple = parsedate_tz(response_headers["date"]) + assert time_tuple is not None + date = calendar.timegm(time_tuple[:6]) # cache when there is a max-age > 0 - if "max-age" in cc and cc["max-age"] > 0: + max_age = cc.get("max-age") + if max_age is not None and max_age > 0: logger.debug("Caching b/c date exists and max-age > 0") - expires_time = cc["max-age"] + expires_time = max_age self._cache_set( cache_url, request, @@ -391,7 +436,7 @@ def cache_response(self, request, response, body=None, status_codes=None): if response_headers["expires"]: expires = parsedate_tz(response_headers["expires"]) if expires is not None: - expires_time = calendar.timegm(expires) - date + expires_time = calendar.timegm(expires[:6]) - date else: expires_time = None @@ -408,13 +453,16 @@ def cache_response(self, request, response, body=None, status_codes=None): expires_time, ) - def update_cached_response(self, request, response): + def update_cached_response( + self, request: "PreparedRequest", response: "HTTPResponse" + ) -> "HTTPResponse": """On a 304 we will get a new set of headers that we want to update our cached value with, assuming we have one. This should only ever be called when we've sent an ETag and gotten a 304 as the response. """ + assert request.url is not None cache_url = self.cache_url(request.url) cached_response = self._load_from_cache(request) @@ -434,7 +482,7 @@ def update_cached_response(self, request, response): cached_response.headers.update( dict( (k, v) - for k, v in response.headers.items() + for k, v in response.headers.items() # type: ignore[no-untyped-call] if k.lower() not in excluded_headers ) ) diff --git a/src/pip/_vendor/cachecontrol/filewrapper.py b/src/pip/_vendor/cachecontrol/filewrapper.py index f5ed5f6f6ec..472ba600161 100644 --- a/src/pip/_vendor/cachecontrol/filewrapper.py +++ b/src/pip/_vendor/cachecontrol/filewrapper.py @@ -2,8 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 -from tempfile import NamedTemporaryFile import mmap +from tempfile import NamedTemporaryFile +from typing import TYPE_CHECKING, Any, Callable, Optional + +if TYPE_CHECKING: + from http.client import HTTPResponse class CallbackFileWrapper(object): @@ -25,12 +29,14 @@ class CallbackFileWrapper(object): performance impact. """ - def __init__(self, fp, callback): + def __init__( + self, fp: "HTTPResponse", callback: Optional[Callable[[bytes], None]] + ) -> None: self.__buf = NamedTemporaryFile("rb+", delete=True) self.__fp = fp self.__callback = callback - def __getattr__(self, name): + def __getattr__(self, name: str) -> Any: # The vaguaries of garbage collection means that self.__fp is # not always set. By using __getattribute__ and the private # name[0] allows looking up the attribute value and raising an @@ -42,7 +48,7 @@ def __getattr__(self, name): fp = self.__getattribute__("_CallbackFileWrapper__fp") return getattr(fp, name) - def __is_fp_closed(self): + def __is_fp_closed(self) -> bool: try: return self.__fp.fp is None @@ -50,7 +56,8 @@ def __is_fp_closed(self): pass try: - return self.__fp.closed + closed: bool = self.__fp.closed + return closed except AttributeError: pass @@ -59,7 +66,7 @@ def __is_fp_closed(self): # TODO: Add some logging here... return False - def _close(self): + def _close(self) -> None: if self.__callback: if self.__buf.tell() == 0: # Empty file: @@ -86,8 +93,8 @@ def _close(self): # Important when caching big files. self.__buf.close() - def read(self, amt=None): - data = self.__fp.read(amt) + def read(self, amt: Optional[int] = None) -> bytes: + data: bytes = self.__fp.read(amt) if data: # We may be dealing with b'', a sign that things are over: # it's passed e.g. after we've already closed self.__buf. @@ -97,8 +104,8 @@ def read(self, amt=None): return data - def _safe_read(self, amt): - data = self.__fp._safe_read(amt) + def _safe_read(self, amt: int) -> bytes: + data: bytes = self.__fp._safe_read(amt) # type: ignore[attr-defined] if amt == 2 and data == b"\r\n": # urllib executes this read to toss the CRLF at the end # of the chunk. diff --git a/src/pip/_vendor/cachecontrol/heuristics.py b/src/pip/_vendor/cachecontrol/heuristics.py index ebe4a96f589..1e88ada68f2 100644 --- a/src/pip/_vendor/cachecontrol/heuristics.py +++ b/src/pip/_vendor/cachecontrol/heuristics.py @@ -4,26 +4,27 @@ import calendar import time - +from datetime import datetime, timedelta, timezone from email.utils import formatdate, parsedate, parsedate_tz +from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional -from datetime import datetime, timedelta +if TYPE_CHECKING: + from pip._vendor.urllib3 import HTTPResponse TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" -def expire_after(delta, date=None): - date = date or datetime.utcnow() +def expire_after(delta: timedelta, date: Optional[datetime] = None) -> datetime: + date = date or datetime.now(timezone.utc) return date + delta -def datetime_to_header(dt): +def datetime_to_header(dt: datetime) -> str: return formatdate(calendar.timegm(dt.timetuple())) class BaseHeuristic(object): - - def warning(self, response): + def warning(self, response: "HTTPResponse") -> Optional[str]: """ Return a valid 1xx warning header value describing the cache adjustments. @@ -34,7 +35,7 @@ def warning(self, response): """ return '110 - "Response is Stale"' - def update_headers(self, response): + def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: """Update the response headers with any new headers. NOTE: This SHOULD always include some Warning header to @@ -43,7 +44,7 @@ def update_headers(self, response): """ return {} - def apply(self, response): + def apply(self, response: "HTTPResponse") -> "HTTPResponse": updated_headers = self.update_headers(response) if updated_headers: @@ -61,12 +62,12 @@ class OneDayCache(BaseHeuristic): future. """ - def update_headers(self, response): + def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: headers = {} if "expires" not in response.headers: date = parsedate(response.headers["date"]) - expires = expire_after(timedelta(days=1), date=datetime(*date[:6])) + expires = expire_after(timedelta(days=1), date=datetime(*date[:6], tzinfo=timezone.utc)) # type: ignore[misc] headers["expires"] = datetime_to_header(expires) headers["cache-control"] = "public" return headers @@ -77,14 +78,14 @@ class ExpiresAfter(BaseHeuristic): Cache **all** requests for a defined time period. """ - def __init__(self, **kw): + def __init__(self, **kw: Any) -> None: self.delta = timedelta(**kw) - def update_headers(self, response): + def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: expires = expire_after(self.delta) return {"expires": datetime_to_header(expires), "cache-control": "public"} - def warning(self, response): + def warning(self, response: "HTTPResponse") -> Optional[str]: tmpl = "110 - Automatically cached for %s. Response might be stale" return tmpl % self.delta @@ -101,12 +102,23 @@ class LastModified(BaseHeuristic): http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397 Unlike mozilla we limit this to 24-hr. """ + cacheable_by_default_statuses = { - 200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501 + 200, + 203, + 204, + 206, + 300, + 301, + 404, + 405, + 410, + 414, + 501, } - def update_headers(self, resp): - headers = resp.headers + def update_headers(self, resp: "HTTPResponse") -> Dict[str, str]: + headers: Mapping[str, str] = resp.headers if "expires" in headers: return {} @@ -120,9 +132,11 @@ def update_headers(self, resp): if "date" not in headers or "last-modified" not in headers: return {} - date = calendar.timegm(parsedate_tz(headers["date"])) + time_tuple = parsedate_tz(headers["date"]) + assert time_tuple is not None + date = calendar.timegm(time_tuple[:6]) last_modified = parsedate(headers["last-modified"]) - if date is None or last_modified is None: + if last_modified is None: return {} now = time.time() @@ -135,5 +149,5 @@ def update_headers(self, resp): expires = date + freshness_lifetime return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))} - def warning(self, resp): + def warning(self, resp: "HTTPResponse") -> Optional[str]: return None diff --git a/src/pip/_vendor/cachecontrol/py.typed b/src/pip/_vendor/cachecontrol/py.typed new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/pip/_vendor/cachecontrol/serialize.py b/src/pip/_vendor/cachecontrol/serialize.py index 7fe1a3e33a3..f21eaea6f39 100644 --- a/src/pip/_vendor/cachecontrol/serialize.py +++ b/src/pip/_vendor/cachecontrol/serialize.py @@ -5,19 +5,23 @@ import base64 import io import json +import pickle import zlib +from typing import IO, TYPE_CHECKING, Any, Mapping, Optional from pip._vendor import msgpack from pip._vendor.requests.structures import CaseInsensitiveDict +from pip._vendor.urllib3 import HTTPResponse -from .compat import HTTPResponse, pickle, text_type +if TYPE_CHECKING: + from pip._vendor.requests import PreparedRequest, Request -def _b64_decode_bytes(b): +def _b64_decode_bytes(b: str) -> bytes: return base64.b64decode(b.encode("ascii")) -def _b64_decode_str(s): +def _b64_decode_str(s: str) -> str: return _b64_decode_bytes(s).decode("utf8") @@ -25,54 +29,57 @@ def _b64_decode_str(s): class Serializer(object): - def dumps(self, request, response, body=None): - response_headers = CaseInsensitiveDict(response.headers) + def dumps( + self, + request: "PreparedRequest", + response: HTTPResponse, + body: Optional[bytes] = None, + ) -> bytes: + response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( + response.headers + ) if body is None: # When a body isn't passed in, we'll read the response. We # also update the response with a new file handler to be # sure it acts as though it was never read. body = response.read(decode_content=False) - response._fp = io.BytesIO(body) - - # NOTE: This is all a bit weird, but it's really important that on - # Python 2.x these objects are unicode and not str, even when - # they contain only ascii. The problem here is that msgpack - # understands the difference between unicode and bytes and we - # have it set to differentiate between them, however Python 2 - # doesn't know the difference. Forcing these to unicode will be - # enough to have msgpack know the difference. + response._fp = io.BytesIO(body) # type: ignore[attr-defined] + response.length_remaining = len(body) + data = { - u"response": { - u"body": body, # Empty bytestring if body is stored separately - u"headers": dict( - (text_type(k), text_type(v)) for k, v in response.headers.items() - ), - u"status": response.status, - u"version": response.version, - u"reason": text_type(response.reason), - u"strict": response.strict, - u"decode_content": response.decode_content, + "response": { + "body": body, # Empty bytestring if body is stored separately + "headers": dict((str(k), str(v)) for k, v in response.headers.items()), # type: ignore[no-untyped-call] + "status": response.status, + "version": response.version, + "reason": str(response.reason), + "decode_content": response.decode_content, } } # Construct our vary headers - data[u"vary"] = {} - if u"vary" in response_headers: - varied_headers = response_headers[u"vary"].split(",") + data["vary"] = {} + if "vary" in response_headers: + varied_headers = response_headers["vary"].split(",") for header in varied_headers: - header = text_type(header).strip() + header = str(header).strip() header_value = request.headers.get(header, None) if header_value is not None: - header_value = text_type(header_value) - data[u"vary"][header] = header_value + header_value = str(header_value) + data["vary"][header] = header_value return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)]) - def loads(self, request, data, body_file=None): + def loads( + self, + request: "PreparedRequest", + data: bytes, + body_file: Optional["IO[bytes]"] = None, + ) -> Optional[HTTPResponse]: # Short circuit if we've been given an empty set of data if not data: - return + return None # Determine what version of the serializer the data was serialized # with @@ -88,18 +95,23 @@ def loads(self, request, data, body_file=None): ver = b"cc=0" # Get the version number out of the cc=N - ver = ver.split(b"=", 1)[-1].decode("ascii") + verstr = ver.split(b"=", 1)[-1].decode("ascii") # Dispatch to the actual load method for the given version try: - return getattr(self, "_loads_v{}".format(ver))(request, data, body_file) + return getattr(self, "_loads_v{}".format(verstr))(request, data, body_file) # type: ignore[no-any-return] except AttributeError: # This is a version we don't have a loads function for, so we'll # just treat it as a miss and return None - return - - def prepare_response(self, request, cached, body_file=None): + return None + + def prepare_response( + self, + request: "Request", + cached: Mapping[str, Any], + body_file: Optional["IO[bytes]"] = None, + ) -> Optional[HTTPResponse]: """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ @@ -108,23 +120,26 @@ def prepare_response(self, request, cached, body_file=None): # This case is also handled in the controller code when creating # a cache entry, but is left here for backwards compatibility. if "*" in cached.get("vary", {}): - return + return None # Ensure that the Vary headers for the cached response match our # request for header, value in cached.get("vary", {}).items(): if request.headers.get(header, None) != value: - return + return None body_raw = cached["response"].pop("body") - headers = CaseInsensitiveDict(data=cached["response"]["headers"]) + headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( + data=cached["response"]["headers"] + ) if headers.get("transfer-encoding", "") == "chunked": headers.pop("transfer-encoding") cached["response"]["headers"] = headers try: + body: "IO[bytes]" if body_file is None: body = io.BytesIO(body_raw) else: @@ -138,28 +153,46 @@ def prepare_response(self, request, cached, body_file=None): # TypeError: 'str' does not support the buffer interface body = io.BytesIO(body_raw.encode("utf8")) + # Discard any `strict` parameter serialized by older version of cachecontrol. + cached["response"].pop("strict", None) + return HTTPResponse(body=body, preload_content=False, **cached["response"]) - def _loads_v0(self, request, data, body_file=None): + def _loads_v0( + self, + request: "Request", + data: bytes, + body_file: Optional["IO[bytes]"] = None, + ) -> None: # The original legacy cache data. This doesn't contain enough # information to construct everything we need, so we'll treat this as # a miss. return - def _loads_v1(self, request, data, body_file=None): + def _loads_v1( + self, + request: "Request", + data: bytes, + body_file: Optional["IO[bytes]"] = None, + ) -> Optional[HTTPResponse]: try: cached = pickle.loads(data) except ValueError: - return + return None return self.prepare_response(request, cached, body_file) - def _loads_v2(self, request, data, body_file=None): + def _loads_v2( + self, + request: "Request", + data: bytes, + body_file: Optional["IO[bytes]"] = None, + ) -> Optional[HTTPResponse]: assert body_file is None try: cached = json.loads(zlib.decompress(data).decode("utf8")) except (ValueError, zlib.error): - return + return None # We need to decode the items that we've base64 encoded cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"]) @@ -175,16 +208,26 @@ def _loads_v2(self, request, data, body_file=None): return self.prepare_response(request, cached, body_file) - def _loads_v3(self, request, data, body_file): + def _loads_v3( + self, + request: "Request", + data: bytes, + body_file: Optional["IO[bytes]"] = None, + ) -> None: # Due to Python 2 encoding issues, it's impossible to know for sure # exactly how to load v3 entries, thus we'll treat these as a miss so # that they get rewritten out as v4 entries. return - def _loads_v4(self, request, data, body_file=None): + def _loads_v4( + self, + request: "Request", + data: bytes, + body_file: Optional["IO[bytes]"] = None, + ) -> Optional[HTTPResponse]: try: cached = msgpack.loads(data, raw=False) except ValueError: - return + return None return self.prepare_response(request, cached, body_file) diff --git a/src/pip/_vendor/cachecontrol/wrapper.py b/src/pip/_vendor/cachecontrol/wrapper.py index b6ee7f20398..293e69fe7d4 100644 --- a/src/pip/_vendor/cachecontrol/wrapper.py +++ b/src/pip/_vendor/cachecontrol/wrapper.py @@ -2,21 +2,30 @@ # # SPDX-License-Identifier: Apache-2.0 -from .adapter import CacheControlAdapter -from .cache import DictCache +from typing import TYPE_CHECKING, Collection, Optional, Type +from pip._vendor.cachecontrol.adapter import CacheControlAdapter +from pip._vendor.cachecontrol.cache import DictCache -def CacheControl( - sess, - cache=None, - cache_etags=True, - serializer=None, - heuristic=None, - controller_class=None, - adapter_class=None, - cacheable_methods=None, -): +if TYPE_CHECKING: + from pip._vendor import requests + + from pip._vendor.cachecontrol.cache import BaseCache + from pip._vendor.cachecontrol.controller import CacheController + from pip._vendor.cachecontrol.heuristics import BaseHeuristic + from pip._vendor.cachecontrol.serialize import Serializer + +def CacheControl( + sess: "requests.Session", + cache: Optional["BaseCache"] = None, + cache_etags: bool = True, + serializer: Optional["Serializer"] = None, + heuristic: Optional["BaseHeuristic"] = None, + controller_class: Optional[Type["CacheController"]] = None, + adapter_class: Optional[Type[CacheControlAdapter]] = None, + cacheable_methods: Optional[Collection[str]] = None, +) -> "requests.Session": cache = DictCache() if cache is None else cache adapter_class = adapter_class or CacheControlAdapter adapter = adapter_class( diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index dcf89dc04c5..d0f4c71cccc 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -1,4 +1,4 @@ -CacheControl==0.12.11 # Make sure to update the license in pyproject.toml for this. +CacheControl==0.13.0 # Make sure to update the license in pyproject.toml for this. colorama==0.4.6 distlib==0.3.6 distro==1.8.0 From 9fb93c478ef7d5e1423cc66467bb63c686864828 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 2 Jun 2023 14:00:15 -0400 Subject: [PATCH 11/17] mypy fix. --- src/pip/_internal/network/cache.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index d6b8ccdcf36..a4d13620532 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -3,7 +3,8 @@ import os from contextlib import contextmanager -from typing import BinaryIO, Generator, Optional +from datetime import datetime +from typing import BinaryIO, Generator, Optional, Union from pip._vendor.cachecontrol.cache import SeparateBodyBaseCache from pip._vendor.cachecontrol.caches import SeparateBodyFileCache @@ -62,7 +63,9 @@ def _write(self, path: str, data: bytes) -> None: replace(f.name, path) - def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None: + def set( + self, key: str, value: bytes, expires: Union[int, datetime, None] = None + ) -> None: path = self._get_cache_path(key) self._write(path, value) From 28590a0a0809b3bb8999b4d08aa93bd9ffb3458d Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 12 Jun 2023 09:29:35 -0400 Subject: [PATCH 12/17] Improve documentation of caching and the cache subcommand. --- docs/html/topics/caching.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md index 954cebe402d..19bd064a74c 100644 --- a/docs/html/topics/caching.md +++ b/docs/html/topics/caching.md @@ -27,6 +27,12 @@ While this cache attempts to minimize network activity, it does not prevent network access altogether. If you want a local install solution that circumvents accessing PyPI, see {ref}`Installing from local packages`. +In versions prior to 23.2, this cache was stored in a directory called `http` in +the main cache directory (see below for its location). In 23.2 and later, a new +cache format is used, stored in a directory called `http-v2`. If you have +completely switched to newer versions of `pip`, you may wish to delete the old +directory. + (wheel-caching)= ### Locally built wheels @@ -124,11 +130,11 @@ The {ref}`pip cache` command can be used to manage pip's cache. ### Removing a single package -`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache. +`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache. HTTP cache files are not removed at this time. ### Removing the cache -`pip cache purge` will clear all wheel files from pip's cache. +`pip cache purge` will clear all files from pip's wheel and HTTP caches. ### Listing cached files From dcd2d5e344f27149789f05edb9da45994eac2473 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 12 Jun 2023 09:30:30 -0400 Subject: [PATCH 13/17] Update CacheControl to 0.13.1. --- src/pip/_vendor/cachecontrol/__init__.py | 2 +- src/pip/_vendor/cachecontrol/_cmd.py | 5 +- src/pip/_vendor/cachecontrol/adapter.py | 51 ++++---- src/pip/_vendor/cachecontrol/cache.py | 18 +-- .../_vendor/cachecontrol/caches/file_cache.py | 17 +-- .../cachecontrol/caches/redis_cache.py | 10 +- src/pip/_vendor/cachecontrol/controller.py | 58 +++++---- src/pip/_vendor/cachecontrol/filewrapper.py | 9 +- src/pip/_vendor/cachecontrol/heuristics.py | 23 ++-- src/pip/_vendor/cachecontrol/serialize.py | 111 +++++++----------- src/pip/_vendor/cachecontrol/wrapper.py | 19 +-- src/pip/_vendor/vendor.txt | 2 +- 12 files changed, 149 insertions(+), 176 deletions(-) diff --git a/src/pip/_vendor/cachecontrol/__init__.py b/src/pip/_vendor/cachecontrol/__init__.py index 3701cdd6be8..4d20bc9b12a 100644 --- a/src/pip/_vendor/cachecontrol/__init__.py +++ b/src/pip/_vendor/cachecontrol/__init__.py @@ -8,7 +8,7 @@ """ __author__ = "Eric Larson" __email__ = "eric@ionrock.org" -__version__ = "0.13.0" +__version__ = "0.13.1" from pip._vendor.cachecontrol.adapter import CacheControlAdapter from pip._vendor.cachecontrol.controller import CacheController diff --git a/src/pip/_vendor/cachecontrol/_cmd.py b/src/pip/_vendor/cachecontrol/_cmd.py index ab4dac3dde1..2c84208a5d8 100644 --- a/src/pip/_vendor/cachecontrol/_cmd.py +++ b/src/pip/_vendor/cachecontrol/_cmd.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations import logging from argparse import ArgumentParser @@ -36,7 +37,7 @@ def get_session() -> requests.Session: return sess -def get_args() -> "Namespace": +def get_args() -> Namespace: parser = ArgumentParser() parser.add_argument("url", help="The URL to try and cache") return parser.parse_args() @@ -53,7 +54,7 @@ def main() -> None: setup_logging() # try setting the cache - cache_controller: "CacheController" = ( + cache_controller: CacheController = ( sess.cache_controller # type: ignore[attr-defined] ) cache_controller.cache_response(resp.request, resp.raw) diff --git a/src/pip/_vendor/cachecontrol/adapter.py b/src/pip/_vendor/cachecontrol/adapter.py index 83c08e003fe..3e83e308dba 100644 --- a/src/pip/_vendor/cachecontrol/adapter.py +++ b/src/pip/_vendor/cachecontrol/adapter.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations import functools import types import zlib -from typing import TYPE_CHECKING, Any, Collection, Mapping, Optional, Tuple, Type, Union +from typing import TYPE_CHECKING, Any, Collection, Mapping from pip._vendor.requests.adapters import HTTPAdapter @@ -27,16 +28,16 @@ class CacheControlAdapter(HTTPAdapter): def __init__( self, - cache: Optional["BaseCache"] = None, + cache: BaseCache | None = None, cache_etags: bool = True, - controller_class: Optional[Type[CacheController]] = None, - serializer: Optional["Serializer"] = None, - heuristic: Optional["BaseHeuristic"] = None, - cacheable_methods: Optional[Collection[str]] = None, + controller_class: type[CacheController] | None = None, + serializer: Serializer | None = None, + heuristic: BaseHeuristic | None = None, + cacheable_methods: Collection[str] | None = None, *args: Any, **kw: Any, ) -> None: - super(CacheControlAdapter, self).__init__(*args, **kw) + super().__init__(*args, **kw) self.cache = DictCache() if cache is None else cache self.heuristic = heuristic self.cacheable_methods = cacheable_methods or ("GET",) @@ -48,16 +49,14 @@ def __init__( def send( self, - request: "PreparedRequest", + request: PreparedRequest, stream: bool = False, - timeout: Union[None, float, Tuple[float, float], Tuple[float, None]] = None, - verify: Union[bool, str] = True, - cert: Union[ - None, bytes, str, Tuple[Union[bytes, str], Union[bytes, str]] - ] = None, - proxies: Optional[Mapping[str, str]] = None, - cacheable_methods: Optional[Collection[str]] = None, - ) -> "Response": + timeout: None | float | tuple[float, float] | tuple[float, None] = None, + verify: bool | str = True, + cert: (None | bytes | str | tuple[bytes | str, bytes | str]) = None, + proxies: Mapping[str, str] | None = None, + cacheable_methods: Collection[str] | None = None, + ) -> Response: """ Send a request. Use the request information to see if it exists in the cache and cache the response if we need to and can. @@ -74,19 +73,17 @@ def send( # check for etags and add headers if appropriate request.headers.update(self.controller.conditional_headers(request)) - resp = super(CacheControlAdapter, self).send( - request, stream, timeout, verify, cert, proxies - ) + resp = super().send(request, stream, timeout, verify, cert, proxies) return resp def build_response( self, - request: "PreparedRequest", - response: "HTTPResponse", + request: PreparedRequest, + response: HTTPResponse, from_cache: bool = False, - cacheable_methods: Optional[Collection[str]] = None, - ) -> "Response": + cacheable_methods: Collection[str] | None = None, + ) -> Response: """ Build a response by making a request or using the cache. @@ -137,7 +134,7 @@ def build_response( if response.chunked: super_update_chunk_length = response._update_chunk_length # type: ignore[attr-defined] - def _update_chunk_length(self: "HTTPResponse") -> None: + def _update_chunk_length(self: HTTPResponse) -> None: super_update_chunk_length() if self.chunk_left == 0: self._fp._close() # type: ignore[attr-defined] @@ -146,9 +143,7 @@ def _update_chunk_length(self: "HTTPResponse") -> None: _update_chunk_length, response ) - resp: "Response" = super( # type: ignore[no-untyped-call] - CacheControlAdapter, self - ).build_response(request, response) + resp: Response = super().build_response(request, response) # type: ignore[no-untyped-call] # See if we should invalidate the cache. if request.method in self.invalidating_methods and resp.ok: @@ -163,4 +158,4 @@ def _update_chunk_length(self: "HTTPResponse") -> None: def close(self) -> None: self.cache.close() - super(CacheControlAdapter, self).close() # type: ignore[no-untyped-call] + super().close() # type: ignore[no-untyped-call] diff --git a/src/pip/_vendor/cachecontrol/cache.py b/src/pip/_vendor/cachecontrol/cache.py index 61031d23441..3293b0057c7 100644 --- a/src/pip/_vendor/cachecontrol/cache.py +++ b/src/pip/_vendor/cachecontrol/cache.py @@ -6,19 +6,21 @@ The cache object API for implementing caches. The default is a thread safe in-memory dictionary. """ +from __future__ import annotations + from threading import Lock -from typing import IO, TYPE_CHECKING, MutableMapping, Optional, Union +from typing import IO, TYPE_CHECKING, MutableMapping if TYPE_CHECKING: from datetime import datetime -class BaseCache(object): - def get(self, key: str) -> Optional[bytes]: +class BaseCache: + def get(self, key: str) -> bytes | None: raise NotImplementedError() def set( - self, key: str, value: bytes, expires: Optional[Union[int, "datetime"]] = None + self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: raise NotImplementedError() @@ -30,15 +32,15 @@ def close(self) -> None: class DictCache(BaseCache): - def __init__(self, init_dict: Optional[MutableMapping[str, bytes]] = None) -> None: + def __init__(self, init_dict: MutableMapping[str, bytes] | None = None) -> None: self.lock = Lock() self.data = init_dict or {} - def get(self, key: str) -> Optional[bytes]: + def get(self, key: str) -> bytes | None: return self.data.get(key, None) def set( - self, key: str, value: bytes, expires: Optional[Union[int, "datetime"]] = None + self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: with self.lock: self.data.update({key: value}) @@ -65,7 +67,7 @@ class SeparateBodyBaseCache(BaseCache): def set_body(self, key: str, body: bytes) -> None: raise NotImplementedError() - def get_body(self, key: str) -> Optional["IO[bytes]"]: + def get_body(self, key: str) -> IO[bytes] | None: """ Return the body as file-like object. """ diff --git a/src/pip/_vendor/cachecontrol/caches/file_cache.py b/src/pip/_vendor/cachecontrol/caches/file_cache.py index 0437c4e8a13..1fd28013084 100644 --- a/src/pip/_vendor/cachecontrol/caches/file_cache.py +++ b/src/pip/_vendor/cachecontrol/caches/file_cache.py @@ -1,11 +1,12 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations import hashlib import os from textwrap import dedent -from typing import IO, TYPE_CHECKING, Optional, Type, Union +from typing import IO, TYPE_CHECKING from pip._vendor.cachecontrol.cache import BaseCache, SeparateBodyBaseCache from pip._vendor.cachecontrol.controller import CacheController @@ -16,7 +17,7 @@ from filelock import BaseFileLock -def _secure_open_write(filename: str, fmode: int) -> "IO[bytes]": +def _secure_open_write(filename: str, fmode: int) -> IO[bytes]: # We only want to write to this file, so open it in write only mode flags = os.O_WRONLY @@ -39,7 +40,7 @@ def _secure_open_write(filename: str, fmode: int) -> "IO[bytes]": # there try: os.remove(filename) - except (IOError, OSError): + except OSError: # The file must not exist already, so we can just skip ahead to opening pass @@ -66,7 +67,7 @@ def __init__( forever: bool = False, filemode: int = 0o0600, dirmode: int = 0o0700, - lock_class: Optional[Type["BaseFileLock"]] = None, + lock_class: type[BaseFileLock] | None = None, ) -> None: try: if lock_class is None: @@ -100,7 +101,7 @@ def _fn(self, name: str) -> str: parts = list(hashed[:5]) + [hashed] return os.path.join(self.directory, *parts) - def get(self, key: str) -> Optional[bytes]: + def get(self, key: str) -> bytes | None: name = self._fn(key) try: with open(name, "rb") as fh: @@ -110,7 +111,7 @@ def get(self, key: str) -> Optional[bytes]: return None def set( - self, key: str, value: bytes, expires: Optional[Union[int, "datetime"]] = None + self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: name = self._fn(key) self._write(name, value) @@ -122,7 +123,7 @@ def _write(self, path: str, data: bytes) -> None: # Make sure the directory exists try: os.makedirs(os.path.dirname(path), self.dirmode) - except (IOError, OSError): + except OSError: pass with self.lock_class(path + ".lock"): @@ -155,7 +156,7 @@ class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache): peak memory usage. """ - def get_body(self, key: str) -> Optional["IO[bytes]"]: + def get_body(self, key: str) -> IO[bytes] | None: name = self._fn(key) + ".body" try: return open(name, "rb") diff --git a/src/pip/_vendor/cachecontrol/caches/redis_cache.py b/src/pip/_vendor/cachecontrol/caches/redis_cache.py index f7ae45d3828..f4f68c47bf6 100644 --- a/src/pip/_vendor/cachecontrol/caches/redis_cache.py +++ b/src/pip/_vendor/cachecontrol/caches/redis_cache.py @@ -1,11 +1,11 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations -from __future__ import division from datetime import datetime, timezone -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING from pip._vendor.cachecontrol.cache import BaseCache @@ -14,14 +14,14 @@ class RedisCache(BaseCache): - def __init__(self, conn: "Redis[bytes]") -> None: + def __init__(self, conn: Redis[bytes]) -> None: self.conn = conn - def get(self, key: str) -> Optional[bytes]: + def get(self, key: str) -> bytes | None: return self.conn.get(key) def set( - self, key: str, value: bytes, expires: Optional[Union[int, datetime]] = None + self, key: str, value: bytes, expires: int | datetime | None = None ) -> None: if not expires: self.conn.set(key, value) diff --git a/src/pip/_vendor/cachecontrol/controller.py b/src/pip/_vendor/cachecontrol/controller.py index 3365d962130..586b9f97b80 100644 --- a/src/pip/_vendor/cachecontrol/controller.py +++ b/src/pip/_vendor/cachecontrol/controller.py @@ -5,12 +5,14 @@ """ The httplib2 algorithms ported for use with requests. """ +from __future__ import annotations + import calendar import logging import re import time from email.utils import parsedate_tz -from typing import TYPE_CHECKING, Collection, Dict, Mapping, Optional, Tuple, Union +from typing import TYPE_CHECKING, Collection, Mapping from pip._vendor.requests.structures import CaseInsensitiveDict @@ -32,7 +34,7 @@ PERMANENT_REDIRECT_STATUSES = (301, 308) -def parse_uri(uri: str) -> Tuple[str, str, str, str, str]: +def parse_uri(uri: str) -> tuple[str, str, str, str, str]: """Parses a URI using the regex given in Appendix B of RFC 3986. (scheme, authority, path, query, fragment) = parse_uri(uri) @@ -43,15 +45,15 @@ def parse_uri(uri: str) -> Tuple[str, str, str, str, str]: return (groups[1], groups[3], groups[4], groups[6], groups[8]) -class CacheController(object): +class CacheController: """An interface to see if request should cached or not.""" def __init__( self, - cache: Optional["BaseCache"] = None, + cache: BaseCache | None = None, cache_etags: bool = True, - serializer: Optional[Serializer] = None, - status_codes: Optional[Collection[int]] = None, + serializer: Serializer | None = None, + status_codes: Collection[int] | None = None, ): self.cache = DictCache() if cache is None else cache self.cache_etags = cache_etags @@ -82,9 +84,7 @@ def _urlnorm(cls, uri: str) -> str: def cache_url(cls, uri: str) -> str: return cls._urlnorm(uri) - def parse_cache_control( - self, headers: Mapping[str, str] - ) -> Dict[str, Optional[int]]: + def parse_cache_control(self, headers: Mapping[str, str]) -> dict[str, int | None]: known_directives = { # https://tools.ietf.org/html/rfc7234#section-5.2 "max-age": (int, True), @@ -103,7 +103,7 @@ def parse_cache_control( cc_headers = headers.get("cache-control", headers.get("Cache-Control", "")) - retval: Dict[str, Optional[int]] = {} + retval: dict[str, int | None] = {} for cc_directive in cc_headers.split(","): if not cc_directive.strip(): @@ -138,7 +138,7 @@ def parse_cache_control( return retval - def _load_from_cache(self, request: "PreparedRequest") -> Optional["HTTPResponse"]: + def _load_from_cache(self, request: PreparedRequest) -> HTTPResponse | None: """ Load a cached response, or return None if it's not available. """ @@ -159,9 +159,7 @@ def _load_from_cache(self, request: "PreparedRequest") -> Optional["HTTPResponse logger.warning("Cache entry deserialization failed, entry ignored") return result - def cached_request( - self, request: "PreparedRequest" - ) -> Union["HTTPResponse", "Literal[False]"]: + def cached_request(self, request: PreparedRequest) -> HTTPResponse | Literal[False]: """ Return a cached response if it exists in the cache, otherwise return False. @@ -271,7 +269,7 @@ def cached_request( # return the original handler return False - def conditional_headers(self, request: "PreparedRequest") -> Dict[str, str]: + def conditional_headers(self, request: PreparedRequest) -> dict[str, str]: resp = self._load_from_cache(request) new_headers = {} @@ -289,10 +287,10 @@ def conditional_headers(self, request: "PreparedRequest") -> Dict[str, str]: def _cache_set( self, cache_url: str, - request: "PreparedRequest", - response: "HTTPResponse", - body: Optional[bytes] = None, - expires_time: Optional[int] = None, + request: PreparedRequest, + response: HTTPResponse, + body: bytes | None = None, + expires_time: int | None = None, ) -> None: """ Store the data in the cache. @@ -318,10 +316,10 @@ def _cache_set( def cache_response( self, - request: "PreparedRequest", - response: "HTTPResponse", - body: Optional[bytes] = None, - status_codes: Optional[Collection[int]] = None, + request: PreparedRequest, + response: HTTPResponse, + body: bytes | None = None, + status_codes: Collection[int] | None = None, ) -> None: """ Algorithm for caching requests. @@ -400,7 +398,7 @@ def cache_response( expires_time = max(expires_time, 14 * 86400) - logger.debug("etag object cached for {0} seconds".format(expires_time)) + logger.debug(f"etag object cached for {expires_time} seconds") logger.debug("Caching due to etag") self._cache_set(cache_url, request, response, body, expires_time) @@ -441,7 +439,7 @@ def cache_response( expires_time = None logger.debug( - "Caching b/c of expires header. expires in {0} seconds".format( + "Caching b/c of expires header. expires in {} seconds".format( expires_time ) ) @@ -454,8 +452,8 @@ def cache_response( ) def update_cached_response( - self, request: "PreparedRequest", response: "HTTPResponse" - ) -> "HTTPResponse": + self, request: PreparedRequest, response: HTTPResponse + ) -> HTTPResponse: """On a 304 we will get a new set of headers that we want to update our cached value with, assuming we have one. @@ -480,11 +478,11 @@ def update_cached_response( excluded_headers = ["content-length"] cached_response.headers.update( - dict( - (k, v) + { + k: v for k, v in response.headers.items() # type: ignore[no-untyped-call] if k.lower() not in excluded_headers - ) + } ) # we want a 200 b/c we have content via the cache diff --git a/src/pip/_vendor/cachecontrol/filewrapper.py b/src/pip/_vendor/cachecontrol/filewrapper.py index 472ba600161..25143902a26 100644 --- a/src/pip/_vendor/cachecontrol/filewrapper.py +++ b/src/pip/_vendor/cachecontrol/filewrapper.py @@ -1,16 +1,17 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations import mmap from tempfile import NamedTemporaryFile -from typing import TYPE_CHECKING, Any, Callable, Optional +from typing import TYPE_CHECKING, Any, Callable if TYPE_CHECKING: from http.client import HTTPResponse -class CallbackFileWrapper(object): +class CallbackFileWrapper: """ Small wrapper around a fp object which will tee everything read into a buffer, and when that file is closed it will execute a callback with the @@ -30,7 +31,7 @@ class CallbackFileWrapper(object): """ def __init__( - self, fp: "HTTPResponse", callback: Optional[Callable[[bytes], None]] + self, fp: HTTPResponse, callback: Callable[[bytes], None] | None ) -> None: self.__buf = NamedTemporaryFile("rb+", delete=True) self.__fp = fp @@ -93,7 +94,7 @@ def _close(self) -> None: # Important when caching big files. self.__buf.close() - def read(self, amt: Optional[int] = None) -> bytes: + def read(self, amt: int | None = None) -> bytes: data: bytes = self.__fp.read(amt) if data: # We may be dealing with b'', a sign that things are over: diff --git a/src/pip/_vendor/cachecontrol/heuristics.py b/src/pip/_vendor/cachecontrol/heuristics.py index 1e88ada68f2..b9d72ca4ac5 100644 --- a/src/pip/_vendor/cachecontrol/heuristics.py +++ b/src/pip/_vendor/cachecontrol/heuristics.py @@ -1,12 +1,13 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations import calendar import time from datetime import datetime, timedelta, timezone from email.utils import formatdate, parsedate, parsedate_tz -from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional +from typing import TYPE_CHECKING, Any, Mapping if TYPE_CHECKING: from pip._vendor.urllib3 import HTTPResponse @@ -14,7 +15,7 @@ TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT" -def expire_after(delta: timedelta, date: Optional[datetime] = None) -> datetime: +def expire_after(delta: timedelta, date: datetime | None = None) -> datetime: date = date or datetime.now(timezone.utc) return date + delta @@ -23,8 +24,8 @@ def datetime_to_header(dt: datetime) -> str: return formatdate(calendar.timegm(dt.timetuple())) -class BaseHeuristic(object): - def warning(self, response: "HTTPResponse") -> Optional[str]: +class BaseHeuristic: + def warning(self, response: HTTPResponse) -> str | None: """ Return a valid 1xx warning header value describing the cache adjustments. @@ -35,7 +36,7 @@ def warning(self, response: "HTTPResponse") -> Optional[str]: """ return '110 - "Response is Stale"' - def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: + def update_headers(self, response: HTTPResponse) -> dict[str, str]: """Update the response headers with any new headers. NOTE: This SHOULD always include some Warning header to @@ -44,7 +45,7 @@ def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: """ return {} - def apply(self, response: "HTTPResponse") -> "HTTPResponse": + def apply(self, response: HTTPResponse) -> HTTPResponse: updated_headers = self.update_headers(response) if updated_headers: @@ -62,7 +63,7 @@ class OneDayCache(BaseHeuristic): future. """ - def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: + def update_headers(self, response: HTTPResponse) -> dict[str, str]: headers = {} if "expires" not in response.headers: @@ -81,11 +82,11 @@ class ExpiresAfter(BaseHeuristic): def __init__(self, **kw: Any) -> None: self.delta = timedelta(**kw) - def update_headers(self, response: "HTTPResponse") -> Dict[str, str]: + def update_headers(self, response: HTTPResponse) -> dict[str, str]: expires = expire_after(self.delta) return {"expires": datetime_to_header(expires), "cache-control": "public"} - def warning(self, response: "HTTPResponse") -> Optional[str]: + def warning(self, response: HTTPResponse) -> str | None: tmpl = "110 - Automatically cached for %s. Response might be stale" return tmpl % self.delta @@ -117,7 +118,7 @@ class LastModified(BaseHeuristic): 501, } - def update_headers(self, resp: "HTTPResponse") -> Dict[str, str]: + def update_headers(self, resp: HTTPResponse) -> dict[str, str]: headers: Mapping[str, str] = resp.headers if "expires" in headers: @@ -149,5 +150,5 @@ def update_headers(self, resp: "HTTPResponse") -> Dict[str, str]: expires = date + freshness_lifetime return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))} - def warning(self, resp: "HTTPResponse") -> Optional[str]: + def warning(self, resp: HTTPResponse) -> str | None: return None diff --git a/src/pip/_vendor/cachecontrol/serialize.py b/src/pip/_vendor/cachecontrol/serialize.py index f21eaea6f39..f9e967c3c34 100644 --- a/src/pip/_vendor/cachecontrol/serialize.py +++ b/src/pip/_vendor/cachecontrol/serialize.py @@ -1,39 +1,27 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations -import base64 import io -import json -import pickle -import zlib -from typing import IO, TYPE_CHECKING, Any, Mapping, Optional +from typing import IO, TYPE_CHECKING, Any, Mapping, cast from pip._vendor import msgpack from pip._vendor.requests.structures import CaseInsensitiveDict from pip._vendor.urllib3 import HTTPResponse if TYPE_CHECKING: - from pip._vendor.requests import PreparedRequest, Request + from pip._vendor.requests import PreparedRequest -def _b64_decode_bytes(b: str) -> bytes: - return base64.b64decode(b.encode("ascii")) +class Serializer: + serde_version = "4" - -def _b64_decode_str(s: str) -> str: - return _b64_decode_bytes(s).decode("utf8") - - -_default_body_read = object() - - -class Serializer(object): def dumps( self, - request: "PreparedRequest", + request: PreparedRequest, response: HTTPResponse, - body: Optional[bytes] = None, + body: bytes | None = None, ) -> bytes: response_headers: CaseInsensitiveDict[str] = CaseInsensitiveDict( response.headers @@ -50,7 +38,7 @@ def dumps( data = { "response": { "body": body, # Empty bytestring if body is stored separately - "headers": dict((str(k), str(v)) for k, v in response.headers.items()), # type: ignore[no-untyped-call] + "headers": {str(k): str(v) for k, v in response.headers.items()}, # type: ignore[no-untyped-call] "status": response.status, "version": response.version, "reason": str(response.reason), @@ -69,14 +57,17 @@ def dumps( header_value = str(header_value) data["vary"][header] = header_value - return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)]) + return b",".join([f"cc={self.serde_version}".encode(), self.serialize(data)]) + + def serialize(self, data: dict[str, Any]) -> bytes: + return cast(bytes, msgpack.dumps(data, use_bin_type=True)) def loads( self, - request: "PreparedRequest", + request: PreparedRequest, data: bytes, - body_file: Optional["IO[bytes]"] = None, - ) -> Optional[HTTPResponse]: + body_file: IO[bytes] | None = None, + ) -> HTTPResponse | None: # Short circuit if we've been given an empty set of data if not data: return None @@ -99,7 +90,7 @@ def loads( # Dispatch to the actual load method for the given version try: - return getattr(self, "_loads_v{}".format(verstr))(request, data, body_file) # type: ignore[no-any-return] + return getattr(self, f"_loads_v{verstr}")(request, data, body_file) # type: ignore[no-any-return] except AttributeError: # This is a version we don't have a loads function for, so we'll @@ -108,10 +99,10 @@ def loads( def prepare_response( self, - request: "Request", + request: PreparedRequest, cached: Mapping[str, Any], - body_file: Optional["IO[bytes]"] = None, - ) -> Optional[HTTPResponse]: + body_file: IO[bytes] | None = None, + ) -> HTTPResponse | None: """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ @@ -139,7 +130,7 @@ def prepare_response( cached["response"]["headers"] = headers try: - body: "IO[bytes]" + body: IO[bytes] if body_file is None: body = io.BytesIO(body_raw) else: @@ -160,71 +151,53 @@ def prepare_response( def _loads_v0( self, - request: "Request", + request: PreparedRequest, data: bytes, - body_file: Optional["IO[bytes]"] = None, + body_file: IO[bytes] | None = None, ) -> None: # The original legacy cache data. This doesn't contain enough # information to construct everything we need, so we'll treat this as # a miss. - return + return None def _loads_v1( self, - request: "Request", + request: PreparedRequest, data: bytes, - body_file: Optional["IO[bytes]"] = None, - ) -> Optional[HTTPResponse]: - try: - cached = pickle.loads(data) - except ValueError: - return None - - return self.prepare_response(request, cached, body_file) + body_file: IO[bytes] | None = None, + ) -> HTTPResponse | None: + # The "v1" pickled cache format. This is no longer supported + # for security reasons, so we treat it as a miss. + return None def _loads_v2( self, - request: "Request", + request: PreparedRequest, data: bytes, - body_file: Optional["IO[bytes]"] = None, - ) -> Optional[HTTPResponse]: - assert body_file is None - try: - cached = json.loads(zlib.decompress(data).decode("utf8")) - except (ValueError, zlib.error): - return None - - # We need to decode the items that we've base64 encoded - cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"]) - cached["response"]["headers"] = dict( - (_b64_decode_str(k), _b64_decode_str(v)) - for k, v in cached["response"]["headers"].items() - ) - cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"]) - cached["vary"] = dict( - (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v) - for k, v in cached["vary"].items() - ) - - return self.prepare_response(request, cached, body_file) + body_file: IO[bytes] | None = None, + ) -> HTTPResponse | None: + # The "v2" compressed base64 cache format. + # This has been removed due to age and poor size/performance + # characteristics, so we treat it as a miss. + return None def _loads_v3( self, - request: "Request", + request: PreparedRequest, data: bytes, - body_file: Optional["IO[bytes]"] = None, + body_file: IO[bytes] | None = None, ) -> None: # Due to Python 2 encoding issues, it's impossible to know for sure # exactly how to load v3 entries, thus we'll treat these as a miss so # that they get rewritten out as v4 entries. - return + return None def _loads_v4( self, - request: "Request", + request: PreparedRequest, data: bytes, - body_file: Optional["IO[bytes]"] = None, - ) -> Optional[HTTPResponse]: + body_file: IO[bytes] | None = None, + ) -> HTTPResponse | None: try: cached = msgpack.loads(data, raw=False) except ValueError: diff --git a/src/pip/_vendor/cachecontrol/wrapper.py b/src/pip/_vendor/cachecontrol/wrapper.py index 293e69fe7d4..f618bc363f1 100644 --- a/src/pip/_vendor/cachecontrol/wrapper.py +++ b/src/pip/_vendor/cachecontrol/wrapper.py @@ -1,8 +1,9 @@ # SPDX-FileCopyrightText: 2015 Eric Larson # # SPDX-License-Identifier: Apache-2.0 +from __future__ import annotations -from typing import TYPE_CHECKING, Collection, Optional, Type +from typing import TYPE_CHECKING, Collection from pip._vendor.cachecontrol.adapter import CacheControlAdapter from pip._vendor.cachecontrol.cache import DictCache @@ -17,15 +18,15 @@ def CacheControl( - sess: "requests.Session", - cache: Optional["BaseCache"] = None, + sess: requests.Session, + cache: BaseCache | None = None, cache_etags: bool = True, - serializer: Optional["Serializer"] = None, - heuristic: Optional["BaseHeuristic"] = None, - controller_class: Optional[Type["CacheController"]] = None, - adapter_class: Optional[Type[CacheControlAdapter]] = None, - cacheable_methods: Optional[Collection[str]] = None, -) -> "requests.Session": + serializer: Serializer | None = None, + heuristic: BaseHeuristic | None = None, + controller_class: type[CacheController] | None = None, + adapter_class: type[CacheControlAdapter] | None = None, + cacheable_methods: Collection[str] | None = None, +) -> requests.Session: cache = DictCache() if cache is None else cache adapter_class = adapter_class or CacheControlAdapter adapter = adapter_class( diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt index d0f4c71cccc..c6809dfd6c3 100644 --- a/src/pip/_vendor/vendor.txt +++ b/src/pip/_vendor/vendor.txt @@ -1,4 +1,4 @@ -CacheControl==0.13.0 # Make sure to update the license in pyproject.toml for this. +CacheControl==0.13.1 # Make sure to update the license in pyproject.toml for this. colorama==0.4.6 distlib==0.3.6 distro==1.8.0 From d5e3f0c4b4d6aa4b432cd5480abb234e2e3332fb Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Sep 2023 11:54:00 -0400 Subject: [PATCH 14/17] Use versionchanged syntax --- docs/html/topics/caching.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md index 19bd064a74c..8d6c40f112d 100644 --- a/docs/html/topics/caching.md +++ b/docs/html/topics/caching.md @@ -27,11 +27,12 @@ While this cache attempts to minimize network activity, it does not prevent network access altogether. If you want a local install solution that circumvents accessing PyPI, see {ref}`Installing from local packages`. -In versions prior to 23.2, this cache was stored in a directory called `http` in -the main cache directory (see below for its location). In 23.2 and later, a new -cache format is used, stored in a directory called `http-v2`. If you have -completely switched to newer versions of `pip`, you may wish to delete the old -directory. +```{versionchanged} 23.3 +A new cache format is now used, stored in a directory called `http-v2` (see +below for this directory's location). Previously this cache was stored in a +directory called `http` in the main cache directory. If you have completely +switched to newer versions of `pip`, you may wish to delete the old directory. +``` (wheel-caching)= From b273cee6c5b3572390a3fe9316b2e86661934ce9 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 6 Sep 2023 16:42:38 -0400 Subject: [PATCH 15/17] Combine one entry, explain difference between entries better. --- src/pip/_internal/commands/cache.py | 16 ++++++++-------- tests/functional/test_cache.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 83efabe8785..0b3380da006 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -96,18 +96,19 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: http_cache_location = self._cache_dir(options, "http-v2") old_http_cache_location = self._cache_dir(options, "http") wheels_cache_location = self._cache_dir(options, "wheels") - http_cache_size = filesystem.format_directory_size(http_cache_location) - old_http_cache_size = filesystem.format_directory_size(old_http_cache_location) + http_cache_size = ( + filesystem.format_directory_size(http_cache_location) + + filesystem.format_directory_size(old_http_cache_location) + ) wheels_cache_size = filesystem.format_directory_size(wheels_cache_location) message = ( textwrap.dedent( """ - Package index page cache location (new): {http_cache_location} - Package index page cache location (old): {old_http_cache_location} - Package index page cache size (new): {http_cache_size} - Package index page cache size (old): {old_http_cache_size} - Number of HTTP files (old+new cache): {num_http_files} + Package index page cache location (pip v23.3+): {http_cache_location} + Package index page cache location (older pips): {old_http_cache_location} + Package index page cache size: {http_cache_size} + Number of HTTP files: {num_http_files} Locally built wheels location: {wheels_cache_location} Locally built wheels size: {wheels_cache_size} Number of locally built wheels: {package_count} @@ -117,7 +118,6 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: http_cache_location=http_cache_location, old_http_cache_location=old_http_cache_location, http_cache_size=http_cache_size, - old_http_cache_size=old_http_cache_size, num_http_files=num_http_files, wheels_cache_location=wheels_cache_location, package_count=num_packages, diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index ddafd7332d6..c5d910d453f 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -203,7 +203,7 @@ def test_cache_info( ) -> None: result = script.pip("cache", "info") - assert f"Package index page cache location (new): {http_cache_dir}" in result.stdout + assert f"Package index page cache location (pip v23.3+): {http_cache_dir}" in result.stdout assert f"Locally built wheels location: {wheel_cache_dir}" in result.stdout num_wheels = len(wheel_cache_files) assert f"Number of locally built wheels: {num_wheels}" in result.stdout From ab9f6f37f125401f547cd5df84c66d1fb50e4203 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 7 Sep 2023 12:07:50 -0400 Subject: [PATCH 16/17] Fix formatting, combine numbers not strings! Co-authored-by: Pradyun Gedam --- src/pip/_internal/commands/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 0b3380da006..32d1a221d1e 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -97,8 +97,8 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: old_http_cache_location = self._cache_dir(options, "http") wheels_cache_location = self._cache_dir(options, "wheels") http_cache_size = ( - filesystem.format_directory_size(http_cache_location) + - filesystem.format_directory_size(old_http_cache_location) + filesystem.format_size(filesystem.directory_size(http_cache_location) + + filesystem.directory_size(old_http_cache_location)) ) wheels_cache_size = filesystem.format_directory_size(wheels_cache_location) From 64d2dc3253e4a81e437931fb9b30d636556461d1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Tue, 26 Sep 2023 10:28:27 -0400 Subject: [PATCH 17/17] Fix lints --- src/pip/_internal/commands/cache.py | 8 ++++---- tests/functional/test_cache.py | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index 32d1a221d1e..1f3b5fe142b 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -96,9 +96,9 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: http_cache_location = self._cache_dir(options, "http-v2") old_http_cache_location = self._cache_dir(options, "http") wheels_cache_location = self._cache_dir(options, "wheels") - http_cache_size = ( - filesystem.format_size(filesystem.directory_size(http_cache_location) + - filesystem.directory_size(old_http_cache_location)) + http_cache_size = filesystem.format_size( + filesystem.directory_size(http_cache_location) + + filesystem.directory_size(old_http_cache_location) ) wheels_cache_size = filesystem.format_directory_size(wheels_cache_location) @@ -112,7 +112,7 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: Locally built wheels location: {wheels_cache_location} Locally built wheels size: {wheels_cache_size} Number of locally built wheels: {package_count} - """ + """ # noqa: E501 ) .format( http_cache_location=http_cache_location, diff --git a/tests/functional/test_cache.py b/tests/functional/test_cache.py index c5d910d453f..a744dbbb9bc 100644 --- a/tests/functional/test_cache.py +++ b/tests/functional/test_cache.py @@ -203,7 +203,10 @@ def test_cache_info( ) -> None: result = script.pip("cache", "info") - assert f"Package index page cache location (pip v23.3+): {http_cache_dir}" in result.stdout + assert ( + f"Package index page cache location (pip v23.3+): {http_cache_dir}" + in result.stdout + ) assert f"Locally built wheels location: {wheel_cache_dir}" in result.stdout num_wheels = len(wheel_cache_files) assert f"Number of locally built wheels: {num_wheels}" in result.stdout