Skip to content

Commit

Permalink
cache "concrete" dists by Distribution instead of InstallRequirement
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Aug 13, 2024
1 parent e98cc5c commit ad93d54
Show file tree
Hide file tree
Showing 15 changed files with 231 additions and 72 deletions.
1 change: 1 addition & 0 deletions news/12863.trivial.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``.
5 changes: 5 additions & 0 deletions src/pip/_internal/distributions/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.distributions.sdist import SourceDistribution
from pip._internal.distributions.wheel import WheelDistribution
from pip._internal.req.req_install import InstallRequirement
Expand All @@ -8,6 +9,10 @@ def make_distribution_for_install_requirement(
install_req: InstallRequirement,
) -> AbstractDistribution:
"""Returns a Distribution for the given InstallRequirement"""
# Only pre-installed requirements will have a .satisfied_by dist.
if install_req.satisfied_by:
return InstalledDistribution(install_req)

# Editable requirements will always be source distributions. They use the
# legacy logic until we create a modern standard for them.
if install_req.editable:
Expand Down
19 changes: 16 additions & 3 deletions src/pip/_internal/distributions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,17 @@ def build_tracker_id(self) -> Optional[str]:
If None, then this dist has no work to do in the build tracker, and
``.prepare_distribution_metadata()`` will not be called."""
raise NotImplementedError()
...

@abc.abstractmethod
def get_metadata_distribution(self) -> BaseDistribution:
raise NotImplementedError()
"""Generate a concrete ``BaseDistribution`` instance for this artifact.
The implementation should also cache the result with
``self.req.cache_concrete_dist()`` so the distribution is available to other
users of the ``InstallRequirement``. This method is not called within the build
tracker context, so it should not identify any new setup requirements."""
...

@abc.abstractmethod
def prepare_distribution_metadata(
Expand All @@ -50,4 +56,11 @@ def prepare_distribution_metadata(
build_isolation: bool,
check_build_deps: bool,
) -> None:
raise NotImplementedError()
"""Generate the information necessary to extract metadata from the artifact.
This method will be executed within the context of ``BuildTracker#track()``, so
it needs to fully identify any setup requirements so they can be added to the
same active set of tracked builds, while ``.get_metadata_distribution()`` takes
care of generating and caching the ``BaseDistribution`` to expose to the rest of
the resolve."""
...
14 changes: 9 additions & 5 deletions src/pip/_internal/distributions/installed.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from typing import Optional
from typing import TYPE_CHECKING, Optional

from pip._internal.distributions.base import AbstractDistribution
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution

if TYPE_CHECKING:
from pip._internal.index.package_finder import PackageFinder


class InstalledDistribution(AbstractDistribution):
"""Represents an installed package.
Expand All @@ -17,12 +19,14 @@ def build_tracker_id(self) -> Optional[str]:
return None

def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.satisfied_by is not None, "not actually installed"
return self.req.satisfied_by
dist = self.req.satisfied_by
assert dist is not None, "not actually installed"
self.req.cache_concrete_dist(dist)
return dist

def prepare_distribution_metadata(
self,
finder: PackageFinder,
finder: "PackageFinder",
build_isolation: bool,
check_build_deps: bool,
) -> None:
Expand Down
20 changes: 15 additions & 5 deletions src/pip/_internal/distributions/sdist.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import logging
from typing import TYPE_CHECKING, Iterable, Optional, Set, Tuple
from typing import TYPE_CHECKING, Iterable, Set, Tuple

from pip._internal.build_env import BuildEnvironment
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.exceptions import InstallationError
from pip._internal.metadata import BaseDistribution
from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.utils.subprocess import runner_with_spinner_message

if TYPE_CHECKING:
Expand All @@ -21,13 +21,19 @@ class SourceDistribution(AbstractDistribution):
"""

@property
def build_tracker_id(self) -> Optional[str]:
def build_tracker_id(self) -> str:
"""Identify this requirement uniquely by its link."""
assert self.req.link
return self.req.link.url_without_fragment

def get_metadata_distribution(self) -> BaseDistribution:
return self.req.get_dist()
assert (
self.req.metadata_directory
), "Set as part of .prepare_distribution_metadata()"
dist = get_directory_distribution(self.req.metadata_directory)
self.req.cache_concrete_dist(dist)
self.req.validate_sdist_metadata()
return dist

def prepare_distribution_metadata(
self,
Expand Down Expand Up @@ -66,7 +72,11 @@ def prepare_distribution_metadata(
self._raise_conflicts("the backend dependencies", conflicting)
if missing:
self._raise_missing_reqs(missing)
self.req.prepare_metadata()

# NB: we must still call .cache_concrete_dist() and .validate_sdist_metadata()
# before the InstallRequirement itself has been updated with the metadata from
# this directory!
self.req.prepare_metadata_directory()

def _prepare_build_backend(self, finder: "PackageFinder") -> None:
# Isolate in a BuildEnvironment and install the build-time
Expand Down
4 changes: 3 additions & 1 deletion src/pip/_internal/distributions/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.local_file_path, "Set as part of preparation during download"
assert self.req.name, "Wheels are never unnamed"
wheel = FilesystemWheel(self.req.local_file_path)
return get_wheel_distribution(wheel, canonicalize_name(self.req.name))
dist = get_wheel_distribution(wheel, canonicalize_name(self.req.name))
self.req.cache_concrete_dist(dist)
return dist

def prepare_distribution_metadata(
self,
Expand Down
21 changes: 21 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ class RequiresEntry(NamedTuple):


class BaseDistribution(Protocol):
@property
def is_concrete(self) -> bool:
"""Whether the distribution really exists somewhere on disk.
If this is false, it has been synthesized from metadata, e.g. via
``.from_metadata_file_contents()``, or ``.from_wheel()`` against
a ``MemoryWheel``."""
raise NotImplementedError()

@classmethod
def from_directory(cls, directory: str) -> "BaseDistribution":
"""Load the distribution from a metadata directory.
Expand Down Expand Up @@ -667,6 +676,10 @@ def iter_installed_distributions(
class Wheel(Protocol):
location: str

@property
def is_concrete(self) -> bool:
raise NotImplementedError()

def as_zipfile(self) -> zipfile.ZipFile:
raise NotImplementedError()

Expand All @@ -675,6 +688,10 @@ class FilesystemWheel(Wheel):
def __init__(self, location: str) -> None:
self.location = location

@property
def is_concrete(self) -> bool:
return True

def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.location, allowZip64=True)

Expand All @@ -684,5 +701,9 @@ def __init__(self, location: str, stream: IO[bytes]) -> None:
self.location = location
self.stream = stream

@property
def is_concrete(self) -> bool:
return False

def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.stream, allowZip64=True)
19 changes: 16 additions & 3 deletions src/pip/_internal/metadata/importlib/_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,22 @@ def __init__(
dist: importlib.metadata.Distribution,
info_location: Optional[BasePath],
installed_location: Optional[BasePath],
concrete: bool,
) -> None:
self._dist = dist
self._info_location = info_location
self._installed_location = installed_location
self._concrete = concrete

@property
def is_concrete(self) -> bool:
return self._concrete

@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
info_location = pathlib.Path(directory)
dist = importlib.metadata.Distribution.at(info_location)
return cls(dist, info_location, info_location.parent)
return cls(dist, info_location, info_location.parent, concrete=True)

@classmethod
def from_metadata_file_contents(
Expand All @@ -128,7 +134,7 @@ def from_metadata_file_contents(
metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
return cls(dist, metadata_path.parent, None)
return cls(dist, metadata_path.parent, None, concrete=False)

@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
Expand All @@ -137,7 +143,14 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
dist = WheelDistribution.from_zipfile(zf, name, wheel.location)
except zipfile.BadZipFile as e:
raise InvalidWheel(wheel.location, name) from e
return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location))
except UnsupportedWheel as e:
raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
return cls(
dist,
dist.info_location,
pathlib.PurePosixPath(wheel.location),
concrete=wheel.is_concrete,
)

@property
def location(self) -> Optional[str]:
Expand Down
8 changes: 4 additions & 4 deletions src/pip/_internal/metadata/importlib/_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def find(self, location: str) -> Iterator[BaseDistribution]:
installed_location: Optional[BasePath] = None
else:
installed_location = info_location.parent
yield Distribution(dist, info_location, installed_location)
yield Distribution(dist, info_location, installed_location, concrete=True)

def find_linked(self, location: str) -> Iterator[BaseDistribution]:
"""Read location in egg-link files and return distributions in there.
Expand All @@ -104,7 +104,7 @@ def find_linked(self, location: str) -> Iterator[BaseDistribution]:
continue
target_location = str(path.joinpath(target_rel))
for dist, info_location in self._find_impl(target_location):
yield Distribution(dist, info_location, path)
yield Distribution(dist, info_location, path, concrete=True)

def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
from pip._vendor.pkg_resources import find_distributions
Expand All @@ -116,7 +116,7 @@ def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
if not entry.name.endswith(".egg"):
continue
for dist in find_distributions(entry.path):
yield legacy.Distribution(dist)
yield legacy.Distribution(dist, concrete=True)

def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
from pip._vendor.pkg_resources import find_eggs_in_zip
Expand All @@ -128,7 +128,7 @@ def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
except zipimport.ZipImportError:
return
for dist in find_eggs_in_zip(importer, location):
yield legacy.Distribution(dist)
yield legacy.Distribution(dist, concrete=True)

def find_eggs(self, location: str) -> Iterator[BaseDistribution]:
"""Find eggs in a location.
Expand Down
15 changes: 10 additions & 5 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def run_script(self, script_name: str, namespace: str) -> None:


class Distribution(BaseDistribution):
def __init__(self, dist: pkg_resources.Distribution) -> None:
def __init__(self, dist: pkg_resources.Distribution, concrete: bool) -> None:
self._dist = dist
self._concrete = concrete
# This is populated lazily, to avoid loading metadata for all possible
# distributions eagerly.
self.__extra_mapping: Optional[Mapping[NormalizedName, str]] = None
Expand All @@ -96,6 +97,10 @@ def _extra_mapping(self) -> Mapping[NormalizedName, str]:

return self.__extra_mapping

@property
def is_concrete(self) -> bool:
return self._concrete

@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
dist_dir = directory.rstrip(os.sep)
Expand All @@ -114,7 +119,7 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0]

dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
return cls(dist)
return cls(dist, concrete=True)

@classmethod
def from_metadata_file_contents(
Expand All @@ -131,7 +136,7 @@ def from_metadata_file_contents(
metadata=InMemoryMetadata(metadata_dict, filename),
project_name=project_name,
)
return cls(dist)
return cls(dist, concrete=False)

@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
Expand All @@ -152,7 +157,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
metadata=InMemoryMetadata(metadata_dict, wheel.location),
project_name=name,
)
return cls(dist)
return cls(dist, concrete=wheel.is_concrete)

@property
def location(self) -> Optional[str]:
Expand Down Expand Up @@ -264,7 +269,7 @@ def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment:

def _iter_distributions(self) -> Iterator[BaseDistribution]:
for dist in self._ws:
yield Distribution(dist)
yield Distribution(dist, concrete=True)

def _search_distribution(self, name: str) -> Optional[BaseDistribution]:
"""Find a distribution matching the ``name`` in the environment.
Expand Down
Loading

0 comments on commit ad93d54

Please sign in to comment.