Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache metadata lookups for sdists and lazy wheels #12256

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/12186.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Avoid downloading any dists in ``install --dry-run`` if PEP 658 ``.metadata`` files or lazy wheels are available.
1 change: 1 addition & 0 deletions news/12256.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Cache computed metadata from sdists and lazy wheels in ``~/.cache/pip/link-metadata`` when ``--use-feature=metadata-cache`` is enabled.
1 change: 1 addition & 0 deletions news/12863.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``.
1 change: 1 addition & 0 deletions news/12871.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor much of ``RequirementPreparer`` to avoid duplicated code paths for metadata-only requirements.
118 changes: 102 additions & 16 deletions src/pip/_internal/cache.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""Cache Management
"""

import abc
import hashlib
import json
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
from typing import Dict, Iterator, List, Optional, Tuple

from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name
Expand All @@ -15,21 +17,71 @@
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
from pip._internal.vcs import vcs

logger = logging.getLogger(__name__)

_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)

ORIGIN_JSON_NAME = "origin.json"


def _contains_egg_info(s: str) -> bool:
"""Determine whether the string looks like an egg_info.

:param s: The string to parse. E.g. foo-2.1
"""
return bool(_egg_info_re.search(s))


def should_cache(
req: InstallRequirement,
) -> bool:
"""
Return whether a built InstallRequirement can be stored in the persistent
wheel cache, assuming the wheel cache is available, and _should_build()
has determined a wheel needs to be built.
"""
if not req.link:
return False

if req.link.is_wheel:
return False

if req.editable or not req.source_dir:
# never cache editable requirements
return False

if req.link and req.link.is_vcs:
# VCS checkout. Do not cache
# unless it points to an immutable commit hash.
assert not req.editable
assert req.source_dir
vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
assert vcs_backend
if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
return True
return False

assert req.link
base, ext = req.link.splitext()
if _contains_egg_info(base):
return True

# Otherwise, do not cache.
return False


def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha224(s.encode("ascii")).hexdigest()


class Cache:
class Cache(abc.ABC):
"""An abstract class - provides cache directories for data from links

:param cache_dir: The root of the cache.
Expand Down Expand Up @@ -73,20 +125,28 @@ def _get_cache_path_parts(self, link: Link) -> List[str]:

return parts

def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
can_not_cache = not self.cache_dir or not canonical_package_name or not link
if can_not_cache:
return []
@abc.abstractmethod
def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached items in for link."""
...

def cache_path(self, link: Link) -> Path:
return Path(self.get_path_for_link(link))

path = self.get_path_for_link(link)
if os.path.isdir(path):
return [(candidate, path) for candidate in os.listdir(path)]
return []

class LinkMetadataCache(Cache):
"""Persistently store the metadata of dists found at each link."""

def get_path_for_link(self, link: Link) -> str:
"""Return a directory to store cached items in for link."""
raise NotImplementedError()
parts = self._get_cache_path_parts(link)
assert self.cache_dir
return os.path.join(self.cache_dir, "link-metadata", *parts)


class WheelCacheBase(Cache):
"""Specializations to the cache concept for wheels."""

@abc.abstractmethod
def get(
self,
link: Link,
Expand All @@ -96,10 +156,27 @@ def get(
"""Returns a link to a cached item if it exists, otherwise returns the
passed link.
"""
raise NotImplementedError()
...

def _can_cache(self, link: Link, canonical_package_name: str) -> bool:
return bool(self.cache_dir and canonical_package_name and link)

def _get_candidates(
self, link: Link, canonical_package_name: str
) -> Iterator[Tuple[str, str]]:
if not self._can_cache(link, canonical_package_name):
return

path = self.get_path_for_link(link)
if not os.path.isdir(path):
return

class SimpleWheelCache(Cache):
for candidate in os.scandir(path):
if candidate.is_file():
yield (candidate.name, path)


class SimpleWheelCache(WheelCacheBase):
"""A cache of wheels for future installs."""

def __init__(self, cache_dir: str) -> None:
Expand Down Expand Up @@ -131,7 +208,7 @@ def get(
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
candidates = []
candidates: List[Tuple[int, str, str]] = []

if not package_name:
return link
Expand Down Expand Up @@ -205,7 +282,7 @@ def __init__(
)


class WheelCache(Cache):
class WheelCache(WheelCacheBase):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache

This Cache allows for gracefully degradation, using the ephem wheel cache
Expand All @@ -223,6 +300,15 @@ def get_path_for_link(self, link: Link) -> str:
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)

def resolve_cache_dir(self, req: InstallRequirement) -> str:
"""Return the persistent or temporary cache directory where the built or
downloaded wheel should be stored."""
cache_available = bool(self.cache_dir)
assert req.link, req
if cache_available and should_cache(req):
return self.get_path_for_link(req.link)
return self.get_ephem_path_for_link(req.link)

def get(
self,
link: Link,
Expand Down
2 changes: 2 additions & 0 deletions src/pip/_internal/cli/cmdoptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,8 @@ def check_list_path_option(options: Values) -> None:
default=[],
choices=[
"fast-deps",
"metadata-cache",
"truststore",
]
+ ALWAYS_ENABLED_FEATURES,
help="Enable new functionality, that may be backward incompatible.",
Expand Down
13 changes: 12 additions & 1 deletion src/pip/_internal/cli/req_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from optparse import Values
from typing import Any, List, Optional, Tuple

from pip._internal.cache import WheelCache
from pip._internal.cache import LinkMetadataCache, WheelCache
from pip._internal.cli import cmdoptions
from pip._internal.cli.index_command import IndexGroupCommand
from pip._internal.cli.index_command import SessionCommandMixin as SessionCommandMixin
Expand Down Expand Up @@ -127,6 +127,16 @@ def make_requirement_preparer(
"fast-deps has no effect when used with the legacy resolver."
)

if options.cache_dir and "metadata-cache" in options.features_enabled:
logger.warning(
"pip is using a local cache for metadata information. "
"This experimental feature is enabled through "
"--use-feature=metadata-cache and it is not ready for "
"production."
)
metadata_cache = LinkMetadataCache(options.cache_dir)
else:
metadata_cache = None
return RequirementPreparer(
build_dir=temp_build_dir_path,
src_dir=options.src_dir,
Expand All @@ -142,6 +152,7 @@ def make_requirement_preparer(
lazy_wheel=lazy_wheel,
verbosity=verbosity,
legacy_resolver=legacy_resolver,
metadata_cache=metadata_cache,
)

@classmethod
Expand Down
5 changes: 3 additions & 2 deletions src/pip/_internal/commands/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ def run(self, options: Values, args: List[str]) -> int:
self.trace_basic_info(finder)

requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
preparer.finalize_linked_requirements(
requirement_set.requirements.values(), require_dist_files=True
)

downloaded: List[str] = []
for req in requirement_set.requirements.values():
Expand All @@ -138,8 +141,6 @@ def run(self, options: Values, args: List[str]) -> int:
preparer.save_linked_requirement(req)
downloaded.append(req.name)

preparer.prepare_linked_requirements_more(requirement_set.requirements.values())

if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))

Expand Down
7 changes: 6 additions & 1 deletion src/pip/_internal/commands/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ def add_options(self) -> None:
help=(
"Don't actually install anything, just print what would be. "
"Can be used in combination with --ignore-installed "
"to 'resolve' the requirements."
"to 'resolve' the requirements. If package metadata is available "
"or cached, --dry-run also avoids downloading the dependency at all."
),
)
self.cmd_opts.add_option(
Expand Down Expand Up @@ -379,6 +380,10 @@ def run(self, options: Values, args: List[str]) -> int:
requirement_set = resolver.resolve(
reqs, check_supported_wheels=not options.target_dir
)
preparer.finalize_linked_requirements(
requirement_set.requirements.values(),
require_dist_files=not options.dry_run,
)

if options.json_report_file:
report = InstallationReport(requirement_set.requirements_to_install)
Expand Down
5 changes: 3 additions & 2 deletions src/pip/_internal/commands/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ def run(self, options: Values, args: List[str]) -> int:
self.trace_basic_info(finder)

requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
preparer.finalize_linked_requirements(
requirement_set.requirements.values(), require_dist_files=True
)

reqs_to_build: List[InstallRequirement] = []
for req in requirement_set.requirements.values():
Expand All @@ -153,8 +156,6 @@ def run(self, options: Values, args: List[str]) -> int:
elif should_build_for_wheel_command(req):
reqs_to_build.append(req)

preparer.prepare_linked_requirements_more(requirement_set.requirements.values())

# build wheels
build_successes, build_failures = build(
reqs_to_build,
Expand Down
14 changes: 13 additions & 1 deletion src/pip/_internal/distributions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.distributions.sdist import SourceDistribution
from pip._internal.distributions.wheel import WheelDistribution
from pip._internal.req.req_install import InstallRequirement
Expand All @@ -7,7 +8,18 @@
def make_distribution_for_install_requirement(
install_req: InstallRequirement,
) -> AbstractDistribution:
"""Returns a Distribution for the given InstallRequirement"""
"""Returns an AbstractDistribution for the given InstallRequirement.

As AbstractDistribution only covers installable artifacts, this method may only be
invoked at the conclusion of a resolve, when the RequirementPreparer has downloaded
the file corresponding to the resolved dist. Commands which intend to consume
metadata-only resolves without downloading should not call this method or
consume AbstractDistribution objects.
"""
# Only pre-installed requirements will have a .satisfied_by dist.
if install_req.satisfied_by:
return InstalledDistribution(install_req)

# Editable requirements will always be source distributions. They use the
# legacy logic until we create a modern standard for them.
if install_req.editable:
Expand Down
19 changes: 16 additions & 3 deletions src/pip/_internal/distributions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,17 @@ def build_tracker_id(self) -> Optional[str]:

If None, then this dist has no work to do in the build tracker, and
``.prepare_distribution_metadata()`` will not be called."""
raise NotImplementedError()
...

@abc.abstractmethod
def get_metadata_distribution(self) -> BaseDistribution:
raise NotImplementedError()
"""Generate a concrete ``BaseDistribution`` instance for this artifact.

The implementation should also cache the result with
``self.req.cache_concrete_dist()`` so the distribution is available to other
users of the ``InstallRequirement``. This method is not called within the build
tracker context, so it should not identify any new setup requirements."""
...

@abc.abstractmethod
def prepare_distribution_metadata(
Expand All @@ -50,4 +56,11 @@ def prepare_distribution_metadata(
build_isolation: bool,
check_build_deps: bool,
) -> None:
raise NotImplementedError()
"""Generate the information necessary to extract metadata from the artifact.

This method will be executed within the context of ``BuildTracker#track()``, so
it needs to fully identify any setup requirements so they can be added to the
same active set of tracked builds, while ``.get_metadata_distribution()`` takes
care of generating and caching the ``BaseDistribution`` to expose to the rest of
the resolve."""
...
14 changes: 9 additions & 5 deletions src/pip/_internal/distributions/installed.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import Optional
from typing import TYPE_CHECKING, Optional

from pip._internal.distributions.base import AbstractDistribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution

if TYPE_CHECKING:
from pip._internal.index.package_finder import PackageFinder


class InstalledDistribution(AbstractDistribution):
"""Represents an installed package.
Expand All @@ -17,12 +19,14 @@ def build_tracker_id(self) -> Optional[str]:
return None

def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.satisfied_by is not None, "not actually installed"
return self.req.satisfied_by
dist = self.req.satisfied_by
assert dist is not None, "not actually installed"
self.req.cache_concrete_dist(dist)
return dist

def prepare_distribution_metadata(
self,
finder: PackageFinder,
finder: "PackageFinder",
build_isolation: bool,
check_build_deps: bool,
) -> None:
Expand Down
Loading