Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New HTTP cache with lower memory usage #11143

Merged
merged 23 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c1ead0a
Switch to new cache format and new cache location.
itamarst May 23, 2022
fa87c9e
Testing for body methods of network cache.
itamarst May 23, 2022
fde34fd
Temporary workaround for https://github.com/ionrock/cachecontrol/issu…
itamarst May 24, 2022
5b7c999
Whitespace fix.
itamarst May 24, 2022
7a609bf
Mypy fix.
itamarst May 24, 2022
3dbba12
Correct name.
itamarst May 24, 2022
bff05e5
Switch to proposed upstream fix.
itamarst May 24, 2022
46f9154
Make sure the file gets closed.
itamarst May 24, 2022
bada631
More accurate type.
itamarst May 24, 2022
ca08c16
Vendor latest version of CacheControl.
pythonspeed Jun 2, 2023
9fb93c4
mypy fix.
pythonspeed Jun 2, 2023
28590a0
Improve documentation of caching and the cache subcommand.
pythonspeed Jun 12, 2023
dcd2d5e
Update CacheControl to 0.13.1.
pythonspeed Jun 12, 2023
577c86c
Merge branch 'main' into 2984-new-cache-lower-memory
itamarst Sep 6, 2023
d5e3f0c
Use versionchanged syntax
pythonspeed Sep 6, 2023
b273cee
Combine one entry, explain difference between entries better.
pythonspeed Sep 6, 2023
ab9f6f3
Fix formatting, combine numbers not strings!
itamarst Sep 7, 2023
3076245
Merge branch 'main' into 2984-new-cache-lower-memory
itamarst Sep 7, 2023
a2d0852
Merge branch 'main' into 2984-new-cache-lower-memory
itamarst Sep 11, 2023
e3cd6ee
Merge branch 'main' into 2984-new-cache-lower-memory
itamarst Sep 12, 2023
b944856
Merge branch 'main' into 2984-new-cache-lower-memory
itamarst Sep 25, 2023
64d2dc3
Fix lints
pythonspeed Sep 26, 2023
cc14055
Merge branch 'main' into 2984-new-cache-lower-memory
itamarst Sep 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions docs/html/topics/caching.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ While this cache attempts to minimize network activity, it does not prevent
network access altogether. If you want a local install solution that
circumvents accessing PyPI, see {ref}`Installing from local packages`.

In versions prior to 23.2, this cache was stored in a directory called `http` in
the main cache directory (see below for its location). In 23.2 and later, a new
cache format is used, stored in a directory called `http-v2`. If you have
completely switched to newer versions of `pip`, you may wish to delete the old
directory.
itamarst marked this conversation as resolved.
Show resolved Hide resolved

(wheel-caching)=

### Locally built wheels
Expand Down Expand Up @@ -124,11 +130,11 @@ The {ref}`pip cache` command can be used to manage pip's cache.

### Removing a single package

`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache.
`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache. HTTP cache files are not removed at this time.

### Removing the cache

`pip cache purge` will clear all wheel files from pip's cache.
`pip cache purge` will clear all files from pip's wheel and HTTP caches.

### Listing cached files

Expand Down
1 change: 1 addition & 0 deletions news/2984.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pip uses less memory when caching large packages. As a result, there is a new on-disk cache format stored in a new directory ($PIP_CACHE_DIR/http-v2).
2 changes: 1 addition & 1 deletion src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _build_session(
ssl_context = None

session = PipSession(
cache=os.path.join(cache_dir, "http") if cache_dir else None,
cache=os.path.join(cache_dir, "http-v2") if cache_dir else None,
retries=retries if retries is not None else options.retries,
trusted_hosts=options.trusted_hosts,
index_urls=self._get_index_urls(options),
Expand Down
21 changes: 15 additions & 6 deletions src/pip/_internal/commands/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,31 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None:
num_http_files = len(self._find_http_files(options))
num_packages = len(self._find_wheels(options, "*"))

http_cache_location = self._cache_dir(options, "http")
http_cache_location = self._cache_dir(options, "http-v2")
old_http_cache_location = self._cache_dir(options, "http")
wheels_cache_location = self._cache_dir(options, "wheels")
http_cache_size = filesystem.format_directory_size(http_cache_location)
old_http_cache_size = filesystem.format_directory_size(old_http_cache_location)
wheels_cache_size = filesystem.format_directory_size(wheels_cache_location)

message = (
textwrap.dedent(
"""
Package index page cache location: {http_cache_location}
Package index page cache size: {http_cache_size}
Number of HTTP files: {num_http_files}
Package index page cache location (new): {http_cache_location}
Package index page cache location (old): {old_http_cache_location}
Package index page cache size (new): {http_cache_size}
Package index page cache size (old): {old_http_cache_size}
Number of HTTP files (old+new cache): {num_http_files}
itamarst marked this conversation as resolved.
Show resolved Hide resolved
Locally built wheels location: {wheels_cache_location}
Locally built wheels size: {wheels_cache_size}
Number of locally built wheels: {package_count}
"""
)
.format(
http_cache_location=http_cache_location,
old_http_cache_location=old_http_cache_location,
http_cache_size=http_cache_size,
old_http_cache_size=old_http_cache_size,
num_http_files=num_http_files,
wheels_cache_location=wheels_cache_location,
package_count=num_packages,
Expand Down Expand Up @@ -195,8 +201,11 @@ def _cache_dir(self, options: Values, subdir: str) -> str:
return os.path.join(options.cache_dir, subdir)

def _find_http_files(self, options: Values) -> List[str]:
http_dir = self._cache_dir(options, "http")
return filesystem.find_files(http_dir, "*")
old_http_dir = self._cache_dir(options, "http")
new_http_dir = self._cache_dir(options, "http-v2")
return filesystem.find_files(old_http_dir, "*") + filesystem.find_files(
new_http_dir, "*"
)

def _find_wheels(self, options: Values, pattern: str) -> List[str]:
wheel_dir = self._cache_dir(options, "wheels")
Expand Down
33 changes: 25 additions & 8 deletions src/pip/_internal/network/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

import os
from contextlib import contextmanager
from typing import Generator, Optional
from datetime import datetime
from typing import BinaryIO, Generator, Optional, Union

from pip._vendor.cachecontrol.cache import BaseCache
from pip._vendor.cachecontrol.caches import FileCache
from pip._vendor.cachecontrol.cache import SeparateBodyBaseCache
from pip._vendor.cachecontrol.caches import SeparateBodyFileCache
from pip._vendor.requests.models import Response

from pip._internal.utils.filesystem import adjacent_tmp_file, replace
Expand All @@ -28,7 +29,7 @@ def suppressed_cache_errors() -> Generator[None, None, None]:
pass


class SafeFileCache(BaseCache):
class SafeFileCache(SeparateBodyBaseCache):
"""
A file based cache which is safe to use even when the target directory may
not be accessible or writable.
Expand All @@ -43,7 +44,7 @@ def _get_cache_path(self, name: str) -> str:
# From cachecontrol.caches.file_cache.FileCache._fn, brought into our
# class for backwards-compatibility and to avoid using a non-public
# method.
hashed = FileCache.encode(name)
hashed = SeparateBodyFileCache.encode(name)
parts = list(hashed[:5]) + [hashed]
return os.path.join(self.directory, *parts)

Expand All @@ -53,17 +54,33 @@ def get(self, key: str) -> Optional[bytes]:
with open(path, "rb") as f:
return f.read()

def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None:
path = self._get_cache_path(key)
def _write(self, path: str, data: bytes) -> None:
with suppressed_cache_errors():
ensure_dir(os.path.dirname(path))

with adjacent_tmp_file(path) as f:
f.write(value)
f.write(data)

replace(f.name, path)

def set(
self, key: str, value: bytes, expires: Union[int, datetime, None] = None
) -> None:
path = self._get_cache_path(key)
self._write(path, value)

def delete(self, key: str) -> None:
path = self._get_cache_path(key)
with suppressed_cache_errors():
os.remove(path)
with suppressed_cache_errors():
os.remove(path + ".body")

def get_body(self, key: str) -> Optional[BinaryIO]:
path = self._get_cache_path(key) + ".body"
with suppressed_cache_errors():
return open(path, "rb")

def set_body(self, key: str, body: bytes) -> None:
path = self._get_cache_path(key) + ".body"
self._write(path, body)
1 change: 0 additions & 1 deletion src/pip/_vendor/cachecontrol.pyi

This file was deleted.

18 changes: 14 additions & 4 deletions src/pip/_vendor/cachecontrol/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,21 @@
"""
__author__ = "Eric Larson"
__email__ = "eric@ionrock.org"
__version__ = "0.12.11"
__version__ = "0.13.1"

from .wrapper import CacheControl
from .adapter import CacheControlAdapter
from .controller import CacheController
from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.controller import CacheController
from pip._vendor.cachecontrol.wrapper import CacheControl

__all__ = [
"__author__",
"__email__",
"__version__",
"CacheControlAdapter",
"CacheController",
"CacheControl",
]

import logging

logging.getLogger(__name__).addHandler(logging.NullHandler())
25 changes: 17 additions & 8 deletions src/pip/_vendor/cachecontrol/_cmd.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,49 @@
# SPDX-FileCopyrightText: 2015 Eric Larson
#
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

import logging
from argparse import ArgumentParser
from typing import TYPE_CHECKING

from pip._vendor import requests

from pip._vendor.cachecontrol.adapter import CacheControlAdapter
from pip._vendor.cachecontrol.cache import DictCache
from pip._vendor.cachecontrol.controller import logger

from argparse import ArgumentParser
if TYPE_CHECKING:
from argparse import Namespace

from pip._vendor.cachecontrol.controller import CacheController

def setup_logging():

def setup_logging() -> None:
logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
logger.addHandler(handler)


def get_session():
def get_session() -> requests.Session:
adapter = CacheControlAdapter(
DictCache(), cache_etags=True, serializer=None, heuristic=None
)
sess = requests.Session()
sess.mount("http://", adapter)
sess.mount("https://", adapter)

sess.cache_controller = adapter.controller
sess.cache_controller = adapter.controller # type: ignore[attr-defined]
return sess


def get_args():
def get_args() -> Namespace:
parser = ArgumentParser()
parser.add_argument("url", help="The URL to try and cache")
return parser.parse_args()


def main(args=None):
def main() -> None:
args = get_args()
sess = get_session()

Expand All @@ -48,10 +54,13 @@ def main(args=None):
setup_logging()

# try setting the cache
sess.cache_controller.cache_response(resp.request, resp.raw)
cache_controller: CacheController = (
sess.cache_controller # type: ignore[attr-defined]
)
cache_controller.cache_response(resp.request, resp.raw)

# Now try to get it
if sess.cache_controller.cached_request(resp.request):
if cache_controller.cached_request(resp.request):
print("Cached!")
else:
print("Not cached :(")
Expand Down
Loading