From a67c59fe8b95e3f6e50d7538b0f6d4cf674756a7 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 9 Jan 2022 00:09:17 -0500 Subject: [PATCH] move the --report implementation into resolvelib --- src/pip/_internal/commands/download.py | 172 +-------------- .../resolution/resolvelib/reporter.py | 203 +++++++++++++++++- .../resolution/resolvelib/resolver.py | 7 + 3 files changed, 216 insertions(+), 166 deletions(-) diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index de9eee6a70d..7b5f805de71 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -1,91 +1,23 @@ import json import logging import os -from dataclasses import dataclass, field from optparse import Values -from typing import Any, Dict, List, Optional, Tuple - -from pip._vendor.packaging.requirements import Requirement -from pip._vendor.packaging.specifiers import SpecifierSet +from typing import List from pip._internal.cli import cmdoptions from pip._internal.cli.cmdoptions import make_target_python from pip._internal.cli.req_command import RequirementCommand, with_cleanup from pip._internal.cli.status_codes import SUCCESS from pip._internal.exceptions import CommandError -from pip._internal.models.link import LinkWithSource, URLDownloadInfo -from pip._internal.req.req_install import produce_exact_version_specifier from pip._internal.req.req_tracker import get_requirement_tracker from pip._internal.resolution.base import RequirementSetWithCandidates -from pip._internal.resolution.resolvelib.candidates import ( - LinkCandidate, - RequiresPythonCandidate, -) -from pip._internal.resolution.resolvelib.requirements import ( - ExplicitRequirement, - RequiresPythonRequirement, -) +from pip._internal.resolution.resolvelib.reporter import ResolutionResult from pip._internal.utils.misc import ensure_dir, normalize_path, write_output from pip._internal.utils.temp_dir import TempDirectory logger = logging.getLogger(__name__) -@dataclass(frozen=True) -class ResolvedCandidate: - """Coalesce all the information pip's resolver retains about an - installation candidate.""" - - req: Requirement - download_info: URLDownloadInfo - dependencies: Tuple[Requirement, ...] - requires_python: Optional[SpecifierSet] - - def as_json(self) -> Dict[str, Any]: - """Return a JSON-serializable representation of this install candidate.""" - return { - "requirement": str(self.req), - "download_info": self.download_info.as_json(), - "dependencies": {dep.name: str(dep) for dep in self.dependencies}, - "requires_python": str(self.requires_python) - if self.requires_python - else None, - } - - -@dataclass -class ResolutionResult: - """The inputs and outputs of a pip internal resolve process.""" - - input_requirements: Tuple[str, ...] - python_version: Optional[SpecifierSet] = None - candidates: Dict[str, ResolvedCandidate] = field(default_factory=dict) - - def as_basic_log(self, output_json_path: str) -> str: - """Generate a summary of the detailed JSON report produced with --report.""" - inputs = " ".join(f"'{req}'" for req in self.input_requirements) - resolved = " ".join(f"'{info.req}'" for info in self.candidates.values()) - return "\n".join( - [ - f"Python version: '{self.python_version}'", - f"Input requirements: {inputs}", - f"Resolution: {resolved}", - f"JSON report written to '{output_json_path}'.", - ] - ) - - def as_json(self) -> Dict[str, Any]: - """Return a JSON-serializable representation of the resolve process.""" - return { - "experimental": True, - "input_requirements": [str(req) for req in self.input_requirements], - "python_version": str(self.python_version), - "candidates": { - name: info.as_json() for name, info in self.candidates.items() - }, - } - - class DownloadCommand(RequirementCommand): """ Download packages from: @@ -220,13 +152,6 @@ def run(self, options: Values, args: List[str]) -> int: self.trace_basic_info(finder) - # TODO: for performance, try to decouple extracting sdist metadata from - # actually building the sdist. See https://github.com/pypa/pip/issues/8929. - # As mentioned in that issue, PEP 658 support on PyPI would address many cases, - # but it would drastically improve performance for many existing packages if we - # attempted to extract PKG-INFO or .egg-info from non-wheel files, falling back - # to the slower setup.py invocation if not found. LazyZipOverHTTP and - # MemoryWheel already implement such a hack for wheel files specifically. requirement_set = resolver.resolve(reqs, check_supported_wheels=True) if not options.dry_run: @@ -239,6 +164,8 @@ def run(self, options: Values, args: List[str]) -> int: if downloaded: write_output("Successfully downloaded %s", " ".join(downloaded)) + # The rest of this method pertains to generating the ResolutionReport with + # --report. if not options.json_report_file: return SUCCESS if not isinstance(requirement_set, RequirementSetWithCandidates): @@ -249,98 +176,13 @@ def run(self, options: Values, args: List[str]) -> int: "so `pip download --report` cannot be used with it. " ) - # Reconstruct the input requirements provided to the resolve. - input_requirements: List[str] = [] - for ireq in reqs: - if ireq.req: - # If the initial requirement string contained a url (retained in - # InstallRequirement.link), add it back to the requirement string - # included in the JSON report. - if ireq.link: - req_string = f"{ireq.req}@{ireq.link.url}" - else: - req_string = str(ireq.req) - else: - assert ireq.link - req_string = ireq.link.url - - input_requirements.append(req_string) - - # Scan all the elements of the resulting `RequirementSet` and map it back to all - # the install candidates preserved by `RequirementSetWithCandidates`. - resolution_result = ResolutionResult( - input_requirements=tuple(input_requirements) + resolution_result = ResolutionResult.generate_resolve_report( + reqs, requirement_set ) - for candidate in requirement_set.candidates.mapping.values(): - # This will occur for the python version requirement, for example. - if candidate.name not in requirement_set.requirements: - if isinstance(candidate, RequiresPythonCandidate): - assert resolution_result.python_version is None - resolution_result.python_version = produce_exact_version_specifier( - str(candidate.version) - ) - continue - raise TypeError( - f"unknown candidate not found in requirement set: {candidate}" - ) - - req = requirement_set.requirements[candidate.name] - assert req.name is not None - assert req.link is not None - assert req.name not in resolution_result.candidates - - # Scan the dependencies of the installation candidates, which cover both - # normal dependencies as well as Requires-Python information. - requires_python: Optional[SpecifierSet] = None - dependencies: List[Requirement] = [] - for maybe_dep in candidate.iter_dependencies(with_requires=True): - # It's unclear why `.iter_dependencies()` may occasionally yield `None`. - if maybe_dep is None: - continue - # There will only ever be one of these for each candidate, if any. We - # extract the version specifier. - if isinstance(maybe_dep, RequiresPythonRequirement): - requires_python = maybe_dep.specifier - continue - - # Convert the 2020 resolver-internal Requirement subclass instance into - # a `packaging.requirements.Requirement` instance. - maybe_req = maybe_dep.as_serializable_requirement() - if maybe_req is None: - continue - - # For `ExplicitRequirement`s only, we want to make sure we propagate any - # source URL into a dependency's `packaging.requirements.Requirement` - # instance. - if isinstance(maybe_dep, ExplicitRequirement): - dep_candidate = maybe_dep.candidate - if maybe_req.url is None and isinstance( - dep_candidate, LinkCandidate - ): - assert dep_candidate.source_link is not None - maybe_req = Requirement( - f"{maybe_req}@{dep_candidate.source_link.url}" - ) - - dependencies.append(maybe_req) - - # Mutate the candidates dictionary to add this candidate after processing - # any dependencies and python version requirement. - resolution_result.candidates[req.name] = ResolvedCandidate( - req=candidate.as_serializable_requirement(), - download_info=URLDownloadInfo.from_link_with_source( - LinkWithSource( - req.link, - source_dir=req.source_dir, - link_is_in_wheel_cache=req.original_link_is_in_wheel_cache, - ) - ), - dependencies=tuple(dependencies), - requires_python=requires_python, - ) # Write a simplified representation of the resolution to stdout. write_output(resolution_result.as_basic_log(options.json_report_file)) + # Write the full report data to the JSON output file. with open(options.json_report_file, "w") as f: json.dump(resolution_result.as_json(), f, indent=4) diff --git a/src/pip/_internal/resolution/resolvelib/reporter.py b/src/pip/_internal/resolution/resolvelib/reporter.py index 6ced5329b81..8a0ceb6a816 100644 --- a/src/pip/_internal/resolution/resolvelib/reporter.py +++ b/src/pip/_internal/resolution/resolvelib/reporter.py @@ -1,9 +1,27 @@ from collections import defaultdict +from dataclasses import dataclass, field from logging import getLogger -from typing import Any, DefaultDict +from typing import Any, DefaultDict, Dict, Iterable, List, Optional, Tuple +from pip._vendor.packaging.requirements import Requirement as PkgRequirement +from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.resolvelib.reporters import BaseReporter +from pip._internal.models.link import LinkWithSource, URLDownloadInfo +from pip._internal.req.req_install import ( + InstallRequirement, + produce_exact_version_specifier, +) +from pip._internal.resolution.base import RequirementSetWithCandidates +from pip._internal.resolution.resolvelib.candidates import ( + LinkCandidate, + RequiresPythonCandidate, +) +from pip._internal.resolution.resolvelib.requirements import ( + ExplicitRequirement, + RequiresPythonRequirement, +) + from .base import Candidate, Requirement logger = getLogger(__name__) @@ -66,3 +84,186 @@ def backtracking(self, candidate: Candidate) -> None: def pinning(self, candidate: Candidate) -> None: logger.info("Reporter.pinning(%r)", candidate) + + +@dataclass(frozen=True) +class ResolvedCandidate: + """Coalesce all the information pip's resolver retains about an + installation candidate.""" + + req: PkgRequirement + download_info: URLDownloadInfo + dependencies: Tuple[PkgRequirement, ...] + requires_python: Optional[SpecifierSet] + + def as_json(self) -> Dict[str, Any]: + """Return a JSON-serializable representation of this install candidate.""" + return { + "requirement": str(self.req), + "download_info": self.download_info.as_json(), + "dependencies": {dep.name: str(dep) for dep in self.dependencies}, + "requires_python": str(self.requires_python) + if self.requires_python + else None, + } + + +@dataclass +class ResolutionResult: + """The inputs and outputs of a pip internal resolve process.""" + + input_requirements: Tuple[str, ...] + python_version: Optional[SpecifierSet] = None + candidates: Dict[str, ResolvedCandidate] = field(default_factory=dict) + + def as_basic_log(self, output_json_path: str) -> str: + """Generate a summary of the detailed JSON report produced with --report.""" + inputs = " ".join(f"'{req}'" for req in self.input_requirements) + resolved = " ".join(f"'{info.req}'" for info in self.candidates.values()) + return "\n".join( + [ + f"Python version: '{self.python_version}'", + f"Input requirements: {inputs}", + f"Resolution: {resolved}", + f"JSON report written to '{output_json_path}'.", + ] + ) + + def as_json(self) -> Dict[str, Any]: + """Return a JSON-serializable representation of the resolve process.""" + return { + "experimental": True, + "input_requirements": [str(req) for req in self.input_requirements], + "python_version": str(self.python_version), + "candidates": { + name: info.as_json() for name, info in self.candidates.items() + }, + } + + @classmethod + def _extract_hashable_resolve_input( + cls, + reqs: Iterable[InstallRequirement], + ) -> Tuple[str, ...]: + """Reconstruct the input requirements provided to the resolve. + + In theory, pip should be able to be re-run with these arguments to get the same + resolve output. Because pip can accept URLs as well as parseable requirement + strings on the command line, this method returns a list of strings instead of + `PkgRequirement` instances. + + These strings are sorted so that they can be hashed and compared efficiently. + """ + input_requirements: List[str] = [] + for ireq in reqs: + if ireq.req: + # If the initial requirement string contained a url (retained in + # InstallRequirement.link), add it back to the requirement string + # included in the JSON report. + if ireq.link: + req_string = f"{ireq.req}@{ireq.link.url}" + else: + req_string = str(ireq.req) + else: + # If the InstallRequirement has no Requirement information, don't + # produce a Requirement string, but simply reproduce the URL. + assert ireq.link + req_string = ireq.link.url + + input_requirements.append(req_string) + + return tuple(sorted(input_requirements)) + + @classmethod + def generate_resolve_report( + cls, + input_requirements: Iterable[InstallRequirement], + resolved_requirement_set: RequirementSetWithCandidates, + ) -> "ResolutionResult": + """Process the resolve to obtain a JSON-serializable/pretty-printable report.""" + hashable_input = cls._extract_hashable_resolve_input(input_requirements) + resolution_result = cls(input_requirements=hashable_input) + + # (1) Scan all the install candidates from `.candidates`. + for candidate in resolved_requirement_set.candidates.mapping.values(): + + # (2) Map each install candidate back to a specific install requirement from + # `.requirements`. + req = resolved_requirement_set.requirements.get(candidate.name, None) + if req is None: + # Pip will impose an implicit `Requires-Python` constraint upon the + # whole resolve corresponding to the value of the `--python-version` + # argument. This shows up as an installation candidate which does not + # correspond to any requirement from the requirement set. + if isinstance(candidate, RequiresPythonCandidate): + # This candidate should only appear once. + assert resolution_result.python_version is None + # Generate a serializable `SpecifierSet` instance. + resolution_result.python_version = produce_exact_version_specifier( + str(candidate.version) + ) + continue + + # All other types of installation candidates are expected to be found + # within the resolved requirement set. + raise TypeError( + f"unknown candidate not found in requirement set: {candidate}" + ) + assert req.name is not None + assert req.link is not None + # Each project name should only be fulfilled by a single + # installation candidate. + assert req.name not in resolution_result.candidates + + # (3) Scan the dependencies of the installation candidates, which cover both + # normal dependencies as well as Requires-Python information. + requires_python: Optional[SpecifierSet] = None + dependencies: List[PkgRequirement] = [] + for maybe_dep in candidate.iter_dependencies(with_requires=True): + # It's unclear why `.iter_dependencies()` may occasionally yield `None`. + if maybe_dep is None: + continue + + # There will only ever be one python version constraint for each + # candidate, if any. We extract the version specifier. + if isinstance(maybe_dep, RequiresPythonRequirement): + requires_python = maybe_dep.specifier + continue + + # Convert the 2020 resolver-internal Requirement subclass instance into + # a `packaging.requirements.Requirement` instance. + maybe_req = maybe_dep.as_serializable_requirement() + if maybe_req is None: + continue + + # For `ExplicitRequirement`s only, we want to make sure we propagate any + # source URL into a dependency's `packaging.requirements.Requirement` + # instance. + if isinstance(maybe_dep, ExplicitRequirement): + dep_candidate = maybe_dep.candidate + if maybe_req.url is None and isinstance( + dep_candidate, LinkCandidate + ): + assert dep_candidate.source_link is not None + maybe_req = PkgRequirement( + f"{maybe_req}@{dep_candidate.source_link.url}" + ) + + dependencies.append(maybe_req) + + # Mutate the candidates dictionary to add this candidate after processing + # any dependencies and python version requirement. + resolution_result.candidates[req.name] = ResolvedCandidate( + req=candidate.as_serializable_requirement(), + download_info=URLDownloadInfo.from_link_with_source( + LinkWithSource( + req.link, + source_dir=req.source_dir, + link_is_in_wheel_cache=req.original_link_is_in_wheel_cache, + ) + ), + dependencies=tuple(dependencies), + requires_python=requires_python, + ) + + return resolution_result diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index 60004c2b7ee..fbbc2109186 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -69,6 +69,13 @@ def __init__( py_version_info=py_version_info, ) self.ignore_dependencies = ignore_dependencies + # TODO: for performance, try to decouple extracting sdist metadata from + # actually building the sdist. See https://github.com/pypa/pip/issues/8929. + # As mentioned in that issue, PEP 658 support on PyPI would address many cases, + # but it would drastically improve performance for many existing packages if we + # attempted to extract PKG-INFO or .egg-info from non-wheel files, falling back + # to the slower setup.py invocation if not found. LazyZipOverHTTP and + # MemoryWheel already implement such a hack for wheel files specifically. self.dry_run = dry_run self.upgrade_strategy = upgrade_strategy self._result: Optional[Result] = None