diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 337cdee772c..c2945e0ef05 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -1,20 +1,99 @@ import json import logging import os +from dataclasses import dataclass, field from optparse import Values -from typing import Dict, List +from typing import Any, Dict, List, Optional, Union + +from pip._vendor.packaging.requirements import Requirement from pip._internal.cli import cmdoptions from pip._internal.cli.cmdoptions import make_target_python from pip._internal.cli.req_command import RequirementCommand, with_cleanup from pip._internal.cli.status_codes import SUCCESS +from pip._internal.models.link import Link, LinkHash from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.resolution.base import RequirementSetWithCandidates from pip._internal.utils.misc import ensure_dir, normalize_path, write_output from pip._internal.utils.temp_dir import TempDirectory logger = logging.getLogger(__name__) +@dataclass(frozen=True) +class DistInfoMetadata: + """???/From PEP 658""" + + metadata_url: str + metadata_hash: Optional[LinkHash] + + @classmethod + def from_link(cls, link: Link) -> Optional["DistInfoMetadata"]: + if link.dist_info_metadata is None: + return None + + metadata_url = f"{link.url_without_fragment}.metadata" + if link.dist_info_metadata == "true": + metadata_hash = None + else: + metadata_hash = LinkHash.split_hash_name_and_value(link.dist_info_metadata) + + return cls(metadata_url=metadata_url, metadata_hash=metadata_hash) + + def as_json(self) -> Dict[str, Union[str, Optional[Dict[str, str]]]]: + return { + "metadata_url": self.metadata_url, + "metadata_hash": ( + self.metadata_hash.as_json() if self.metadata_hash else None + ), + } + + +@dataclass(frozen=True) +class RequirementDownloadInfo: + req: Requirement + url: str + file_hash: Optional[LinkHash] + dist_info_metadata: Optional[DistInfoMetadata] + + @classmethod + def from_req_and_link( + cls, + req: Requirement, + link: Link, + ) -> "RequirementDownloadInfo": + return cls( + req=req, + url=link.url, + file_hash=link.get_link_hash(), + dist_info_metadata=DistInfoMetadata.from_link(link), + ) + + def as_json(self) -> Dict[str, Any]: + return { + "req": str(self.req), + "url": self.url, + "hash": self.file_hash and self.file_hash.as_json(), + "dist_info_metadata": ( + self.dist_info_metadata and self.dist_info_metadata.as_json() + ), + } + + +@dataclass +class DownloadInfos: + implicit_requirements: List[Requirement] = field(default_factory=list) + resolution: Dict[str, RequirementDownloadInfo] = field(default_factory=dict) + + def as_json(self) -> Dict[str, Any]: + return { + "implicit_requirements": [str(req) for req in self.implicit_requirements], + "resolution": { + name: info.as_json() for name, info in self.resolution.items() + }, + } + + class DownloadCommand(RequirementCommand): """ Download packages from: @@ -149,24 +228,46 @@ def run(self, options: Values, args: List[str]) -> int: requirement_set = resolver.resolve(reqs, check_supported_wheels=True) downloaded: List[str] = [] - download_infos: List[Dict[str, str]] = [] for req in requirement_set.requirements.values(): + # If this distribution was not already satisfied, that means we + # downloaded it. if req.satisfied_by is None: - assert req.name is not None - assert req.link is not None - download_infos.append( - { - "name": req.name, - "url": req.link.url, - } - ) preparer.save_linked_requirement(req) + assert req.name is not None downloaded.append(req.name) + download_infos = DownloadInfos() + if options.print_download_urls: + if isinstance(requirement_set, RequirementSetWithCandidates): + for candidate in requirement_set.candidates.mapping.values(): + # This will occur for the python version requirement, for example. + if candidate.name not in requirement_set.requirements: + download_infos.implicit_requirements.append( + candidate.as_serializable_requirement() + ) + continue + req = requirement_set.requirements[candidate.name] + assert req.name is not None + assert req.link is not None + assert req.name not in download_infos.resolution + download_infos.resolution[ + req.name + ] = RequirementDownloadInfo.from_req_and_link( + req=candidate.as_serializable_requirement(), + link=req.link, + ) + else: + logger.warning( + "--print-download-urls is being used with the legacy resolver. " + "The legacy resolver does not retain detailed dependency " + "information, so all the fields in the output JSON file " + "will be empty." + ) + if downloaded: write_output("Successfully downloaded %s", " ".join(downloaded)) if options.print_download_urls: with open(options.print_download_urls, "w") as f: - json.dump(download_infos, f, indent=4) + json.dump(download_infos.as_json(), f, indent=4) return SUCCESS diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index d9412234eed..2dbe65efad9 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -8,10 +8,8 @@ import itertools import logging import os -import re import urllib.parse import urllib.request -import xml.etree.ElementTree from optparse import Values from typing import ( Callable, @@ -29,19 +27,18 @@ from pip._vendor.requests.exceptions import RetryError, SSLError from pip._internal.exceptions import NetworkConnectionError -from pip._internal.models.link import Link +from pip._internal.models.link import HTMLElement, Link from pip._internal.models.search_scope import SearchScope from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status from pip._internal.utils.filetypes import is_archive_file -from pip._internal.utils.misc import pairwise, redact_auth_from_url +from pip._internal.utils.misc import redact_auth_from_url from pip._internal.vcs import vcs from .sources import CandidatesFromPage, LinkSource, build_source logger = logging.getLogger(__name__) -HTMLElement = xml.etree.ElementTree.Element ResponseHeaders = MutableMapping[str, str] @@ -171,94 +168,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str: return page_url -def _clean_url_path_part(part: str) -> str: - """ - Clean a "part" of a URL path (i.e. after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - return urllib.parse.quote(urllib.parse.unquote(part)) - - -def _clean_file_url_path(part: str) -> str: - """ - Clean the first part of a URL path that corresponds to a local - filesystem path (i.e. the first part after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - # Also, on Windows the path part might contain a drive letter which - # should not be quoted. On Linux where drive letters do not - # exist, the colon should be quoted. We rely on urllib.request - # to do the right thing here. - return urllib.request.pathname2url(urllib.request.url2pathname(part)) - - -# percent-encoded: / -_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) - - -def _clean_url_path(path: str, is_local_path: bool) -> str: - """ - Clean the path portion of a URL. - """ - if is_local_path: - clean_func = _clean_file_url_path - else: - clean_func = _clean_url_path_part - - # Split on the reserved characters prior to cleaning so that - # revision strings in VCS URLs are properly preserved. - parts = _reserved_chars_re.split(path) - - cleaned_parts = [] - for to_clean, reserved in pairwise(itertools.chain(parts, [""])): - cleaned_parts.append(clean_func(to_clean)) - # Normalize %xx escapes (e.g. %2f -> %2F) - cleaned_parts.append(reserved.upper()) - - return "".join(cleaned_parts) - - -def _clean_link(url: str) -> str: - """ - Make sure a link is fully quoted. - For example, if ' ' occurs in the URL, it will be replaced with "%20", - and without double-quoting other characters. - """ - # Split the URL into parts according to the general structure - # `scheme://netloc/path;parameters?query#fragment`. - result = urllib.parse.urlparse(url) - # If the netloc is empty, then the URL refers to a local filesystem path. - is_local_path = not result.netloc - path = _clean_url_path(result.path, is_local_path=is_local_path) - return urllib.parse.urlunparse(result._replace(path=path)) - - -def _create_link_from_element( - anchor: HTMLElement, - page_url: str, - base_url: str, -) -> Optional[Link]: - """ - Convert an anchor element in a simple repository page to a Link. - """ - href = anchor.get("href") - if not href: - return None - - url = _clean_link(urllib.parse.urljoin(base_url, href)) - pyrequire = anchor.get("data-requires-python") - yanked_reason = anchor.get("data-yanked") - - link = Link( - url, - comes_from=page_url, - requires_python=pyrequire, - yanked_reason=yanked_reason, - ) - - return link - - class CacheablePageContent: def __init__(self, page: "HTMLPage") -> None: assert page.cache_link_parsing @@ -307,11 +216,7 @@ def parse_links(page: "HTMLPage") -> Iterable[Link]: url = page.url base_url = _determine_base_url(document, url) for anchor in document.findall(".//a"): - link = _create_link_from_element( - anchor, - page_url=url, - base_url=base_url, - ) + link = Link.from_element(anchor, page_url=url, base_url=base_url) if link is None: continue yield link diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 1a5a781cb3e..9ab85e28d24 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -101,6 +101,9 @@ def __repr__(self) -> str: def __str__(self) -> str: return f"{self.raw_name} {self.version}" + def as_serializable_requirement(self) -> Requirement: + raise NotImplementedError() + @property def location(self) -> Optional[str]: """Where the distribution is loaded from. diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index d39f0ba31da..479f61be574 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -120,6 +120,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> "Distribution": ) return cls(dist) + def as_serializable_requirement(self) -> Requirement: + return self._dist.as_requirement() + @property def location(self) -> Optional[str]: return self._dist.location diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 6069b278b9b..3260a070eaa 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -1,14 +1,18 @@ import functools +import itertools import logging import os import posixpath import re import urllib.parse +import xml.etree.ElementTree +from dataclasses import dataclass from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union from pip._internal.utils.filetypes import WHEEL_EXTENSION from pip._internal.utils.hashes import Hashes from pip._internal.utils.misc import ( + pairwise, redact_auth_from_url, split_auth_from_netloc, splitext, @@ -22,9 +26,110 @@ logger = logging.getLogger(__name__) +HTMLElement = xml.etree.ElementTree.Element + + _SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5") +# FIXME: test this!! +@dataclass(frozen=True) +class LinkHash: + name: str + value: str + + # TODO: consider beginning/ending this with \b? + # TODO: consider re.IGNORECASE? + _hash_re = re.compile( + r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES)) + ) + + @classmethod + @functools.lru_cache(maxsize=None) + def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]: + match = cls._hash_re.search(url) + if match is None: + return None + name, value = match.groups() + return cls(name=name, value=value) + + def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: + """ + Return True if the current hash is allowed by `hashes`. + """ + if hashes is None: + return False + return hashes.is_hash_allowed(self.name, hex_digest=self.value) + + def as_json(self) -> Dict[str, str]: + return { + "name": self.name, + "value": self.value, + } + + +def _clean_url_path_part(part: str) -> str: + """ + Clean a "part" of a URL path (i.e. after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + return urllib.parse.quote(urllib.parse.unquote(part)) + + +def _clean_file_url_path(part: str) -> str: + """ + Clean the first part of a URL path that corresponds to a local + filesystem path (i.e. the first part after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + # Also, on Windows the path part might contain a drive letter which + # should not be quoted. On Linux where drive letters do not + # exist, the colon should be quoted. We rely on urllib.request + # to do the right thing here. + return urllib.request.pathname2url(urllib.request.url2pathname(part)) + + +# percent-encoded: / +_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) + + +def _clean_url_path(path: str, is_local_path: bool) -> str: + """ + Clean the path portion of a URL. + """ + if is_local_path: + clean_func = _clean_file_url_path + else: + clean_func = _clean_url_path_part + + # Split on the reserved characters prior to cleaning so that + # revision strings in VCS URLs are properly preserved. + parts = _reserved_chars_re.split(path) + + cleaned_parts = [] + for to_clean, reserved in pairwise(itertools.chain(parts, [""])): + cleaned_parts.append(clean_func(to_clean)) + # Normalize %xx escapes (e.g. %2f -> %2F) + cleaned_parts.append(reserved.upper()) + + return "".join(cleaned_parts) + + +def _ensure_quoted_url(url: str) -> str: + """ + Make sure a link is fully quoted. + For example, if ' ' occurs in the URL, it will be replaced with "%20", + and without double-quoting other characters. + """ + # Split the URL into parts according to the general structure + # `scheme://netloc/path;parameters?query#fragment`. + result = urllib.parse.urlparse(url) + # If the netloc is empty, then the URL refers to a local filesystem path. + is_local_path = not result.netloc + path = _clean_url_path(result.path, is_local_path=is_local_path) + return urllib.parse.urlunparse(result._replace(path=path)) + + class Link(KeyBasedCompareMixin): """Represents a parsed link from a Package Index's simple URL""" @@ -34,6 +139,7 @@ class Link(KeyBasedCompareMixin): "comes_from", "requires_python", "yanked_reason", + "dist_info_metadata", "cache_link_parsing", ] @@ -43,6 +149,7 @@ def __init__( comes_from: Optional[Union[str, "HTMLPage"]] = None, requires_python: Optional[str] = None, yanked_reason: Optional[str] = None, + dist_info_metadata: Optional[str] = None, cache_link_parsing: bool = True, ) -> None: """ @@ -59,6 +166,7 @@ def __init__( a simple repository HTML link. If the file has been yanked but no reason was provided, this should be the empty string. See PEP 592 for more information and the specification. + :param dist_info_metadata: ???/PEP 658 :param cache_link_parsing: A flag that is used elsewhere to determine whether resources retrieved from this link should be cached. PyPI index urls should @@ -78,11 +186,41 @@ def __init__( self.comes_from = comes_from self.requires_python = requires_python if requires_python else None self.yanked_reason = yanked_reason + self.dist_info_metadata = dist_info_metadata super().__init__(key=url, defining_class=Link) self.cache_link_parsing = cache_link_parsing + @classmethod + def from_element( + cls, + anchor: HTMLElement, + page_url: str, + base_url: str, + ) -> Optional["Link"]: + """ + Convert an anchor element in a simple repository page to a Link. + """ + href = anchor.get("href") + if not href: + return None + + url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href)) + pyrequire = anchor.get("data-requires-python") + yanked_reason = anchor.get("data-yanked") + dist_info_metadata = anchor.get("data-dist-info-metadata") + + link = Link( + url, + comes_from=page_url, + requires_python=pyrequire, + yanked_reason=yanked_reason, + dist_info_metadata=dist_info_metadata, + ) + + return link + def __str__(self) -> str: if self.requires_python: rp = f" (requires-python:{self.requires_python})" @@ -165,22 +303,21 @@ def subdirectory_fragment(self) -> Optional[str]: return None return match.group(1) - _hash_re = re.compile( - r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES)) - ) + def get_link_hash(self) -> Optional[LinkHash]: + return LinkHash.split_hash_name_and_value(self._url) @property def hash(self) -> Optional[str]: - match = self._hash_re.search(self._url) - if match: - return match.group(2) + link_hash = self.get_link_hash() + if link_hash is not None: + return link_hash.value return None @property def hash_name(self) -> Optional[str]: - match = self._hash_re.search(self._url) - if match: - return match.group(1) + link_hash = self.get_link_hash() + if link_hash is not None: + return link_hash.name return None @property @@ -210,19 +347,16 @@ def is_yanked(self) -> bool: @property def has_hash(self) -> bool: - return self.hash_name is not None + return self.get_link_hash() is not None def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: """ - Return True if the link has a hash and it is allowed. + Return True if the link has a hash and it is allowed by `hashes`. """ - if hashes is None or not self.has_hash: + link_hash = self.get_link_hash() + if link_hash is None: return False - # Assert non-None so mypy knows self.hash_name and self.hash are str. - assert self.hash_name is not None - assert self.hash is not None - - return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) + return link_hash.is_hash_allowed(hashes) class _CleanResult(NamedTuple): diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 6fa6eb2a2a9..8893d79b3aa 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -62,6 +62,23 @@ logger = logging.getLogger(__name__) +def produce_exact_version_requirement(name: str, version: str) -> Requirement: + if isinstance(parse_version(version), Version): + op = "==" + else: + op = "===" + + return Requirement( + "".join( + [ + name, + op, + version, + ] + ) + ) + + class InstallRequirement: """ Represents something that may be installed later on, may have information @@ -348,20 +365,10 @@ def _set_requirement(self) -> None: assert self.metadata is not None assert self.source_dir is not None - # Construct a Requirement object from the generated metadata - if isinstance(parse_version(self.metadata["Version"]), Version): - op = "==" - else: - op = "===" - - self.req = Requirement( - "".join( - [ - self.metadata["Name"], - op, - self.metadata["Version"], - ] - ) + # Construct a Requirement object from the generated metadata. + self.req = produce_exact_version_requirement( + self.metadata["Name"], + self.metadata["Version"], ) def warn_on_mismatching_name(self) -> None: diff --git a/src/pip/_internal/resolution/base.py b/src/pip/_internal/resolution/base.py index 42dade18c1e..51ed1ee4fbc 100644 --- a/src/pip/_internal/resolution/base.py +++ b/src/pip/_internal/resolution/base.py @@ -1,20 +1,40 @@ -from typing import Callable, List, Optional +import abc +from typing import TYPE_CHECKING, Callable, List, Optional from pip._internal.req.req_install import InstallRequirement from pip._internal.req.req_set import RequirementSet +if TYPE_CHECKING: + from pip._vendor.resolvelib.resolvers import Result as RLResult + + from .resolvelib.base import Candidate, Requirement + + Result = RLResult[Requirement, Candidate, str] + InstallRequirementProvider = Callable[ [str, Optional[InstallRequirement]], InstallRequirement ] -class BaseResolver: +class RequirementSetWithCandidates(RequirementSet): + def __init__( + self, + candidates: "Result", + check_supported_wheels: bool = True, + ) -> None: + self.candidates = candidates + super().__init__(check_supported_wheels=check_supported_wheels) + + +class BaseResolver(metaclass=abc.ABCMeta): + @abc.abstractmethod def resolve( self, root_reqs: List[InstallRequirement], check_supported_wheels: bool ) -> RequirementSet: - raise NotImplementedError() + ... + @abc.abstractmethod def get_installation_order( self, req_set: RequirementSet ) -> List[InstallRequirement]: - raise NotImplementedError() + ... diff --git a/src/pip/_internal/resolution/resolvelib/base.py b/src/pip/_internal/resolution/resolvelib/base.py index b206692a0a9..0826bb70e95 100644 --- a/src/pip/_internal/resolution/resolvelib/base.py +++ b/src/pip/_internal/resolution/resolvelib/base.py @@ -1,5 +1,7 @@ +import abc from typing import FrozenSet, Iterable, Optional, Tuple, Union +from pip._vendor.packaging.requirements import Requirement as PkgRequirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import LegacyVersion, Version @@ -59,8 +61,8 @@ def is_satisfied_by(self, candidate: "Candidate") -> bool: return self.specifier.contains(candidate.version, prereleases=True) -class Requirement: - @property +class Requirement(metaclass=abc.ABCMeta): + @abc.abstractproperty def project_name(self) -> NormalizedName: """The "project name" of a requirement. @@ -68,25 +70,25 @@ def project_name(self) -> NormalizedName: in which case ``name`` would contain the ``[...]`` part, while this refers to the name of the project. """ - raise NotImplementedError("Subclass should override") - @property + @abc.abstractproperty def name(self) -> str: """The name identifying this requirement in the resolver. This is different from ``project_name`` if this requirement contains extras, where ``project_name`` would not contain the ``[...]`` part. """ - raise NotImplementedError("Subclass should override") def is_satisfied_by(self, candidate: "Candidate") -> bool: return False + @abc.abstractmethod def get_candidate_lookup(self) -> CandidateLookup: - raise NotImplementedError("Subclass should override") + ... + @abc.abstractmethod def format_for_error(self) -> str: - raise NotImplementedError("Subclass should override") + ... def _match_link(link: Link, candidate: "Candidate") -> bool: @@ -95,8 +97,8 @@ def _match_link(link: Link, candidate: "Candidate") -> bool: return False -class Candidate: - @property +class Candidate(metaclass=abc.ABCMeta): + @abc.abstractproperty def project_name(self) -> NormalizedName: """The "project name" of the candidate. @@ -104,38 +106,43 @@ def project_name(self) -> NormalizedName: in which case ``name`` would contain the ``[...]`` part, while this refers to the name of the project. """ - raise NotImplementedError("Override in subclass") - @property + @abc.abstractproperty def name(self) -> str: """The name identifying this candidate in the resolver. This is different from ``project_name`` if this candidate contains extras, where ``project_name`` would not contain the ``[...]`` part. """ - raise NotImplementedError("Override in subclass") - @property + @abc.abstractproperty def version(self) -> CandidateVersion: - raise NotImplementedError("Override in subclass") + ... - @property + @abc.abstractmethod + def as_serializable_requirement(self) -> PkgRequirement: + ... + + @abc.abstractproperty def is_installed(self) -> bool: - raise NotImplementedError("Override in subclass") + ... - @property + @abc.abstractproperty def is_editable(self) -> bool: - raise NotImplementedError("Override in subclass") + ... - @property + @abc.abstractproperty def source_link(self) -> Optional[Link]: - raise NotImplementedError("Override in subclass") + ... + @abc.abstractmethod def iter_dependencies(self, with_requires: bool) -> Iterable[Optional[Requirement]]: - raise NotImplementedError("Override in subclass") + ... + @abc.abstractmethod def get_install_requirement(self) -> Optional[InstallRequirement]: - raise NotImplementedError("Override in subclass") + ... + @abc.abstractmethod def format_for_error(self) -> str: - raise NotImplementedError("Subclass should override") + ... diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index c049255fc96..01e89ab1ed1 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -2,6 +2,7 @@ import sys from typing import TYPE_CHECKING, Any, FrozenSet, Iterable, Optional, Tuple, Union, cast +from pip._vendor.packaging.requirements import Requirement as PkgRequirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import Version @@ -13,7 +14,10 @@ install_req_from_editable, install_req_from_line, ) -from pip._internal.req.req_install import InstallRequirement +from pip._internal.req.req_install import ( + InstallRequirement, + produce_exact_version_requirement, +) from pip._internal.utils.misc import normalize_version_info from .base import Candidate, CandidateVersion, Requirement, format_name @@ -31,6 +35,9 @@ # Avoid conflicting with the PyPI package "Python". REQUIRES_PYTHON_IDENTIFIER = cast(NormalizedName, "") +# Avoid clashing with any package on PyPI, but remain parseable as a Requirement. This +# should only be used for .as_serializable_requirement(). +REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER = cast(NormalizedName, "Requires-Python") def as_base_candidate(candidate: Candidate) -> Optional[BaseCandidate]: @@ -156,6 +163,9 @@ def __init__( def __str__(self) -> str: return f"{self.name} {self.version}" + def as_serializable_requirement(self) -> PkgRequirement: + return produce_exact_version_requirement(self.name, str(self.version)) + def __repr__(self) -> str: return "{class_name}({link!r})".format( class_name=self.__class__.__name__, @@ -364,6 +374,9 @@ def name(self) -> str: def version(self) -> CandidateVersion: return self.dist.version + def as_serializable_requirement(self) -> PkgRequirement: + return self.dist.as_serializable_requirement() + @property def is_editable(self) -> bool: return self.dist.editable @@ -446,6 +459,9 @@ def name(self) -> str: def version(self) -> CandidateVersion: return self.base.version + def as_serializable_requirement(self) -> PkgRequirement: + return self.base.as_serializable_requirement() + def format_for_error(self) -> str: return "{} [{}]".format( self.base.format_for_error(), ", ".join(sorted(self.extras)) @@ -528,6 +544,15 @@ def name(self) -> str: def version(self) -> CandidateVersion: return self._version + def as_serializable_requirement(self) -> PkgRequirement: + return produce_exact_version_requirement( + REQUIRES_PYTHON_SERIALIZABLE_IDENTIFIER, str(self.version) + ) + + @property + def is_editable(self) -> bool: + return False + def format_for_error(self) -> str: return f"Python {self.version}" diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index 5d9d264b1af..650269dd59b 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -13,7 +13,11 @@ from pip._internal.operations.prepare import RequirementPreparer from pip._internal.req.req_install import InstallRequirement from pip._internal.req.req_set import RequirementSet -from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider +from pip._internal.resolution.base import ( + BaseResolver, + InstallRequirementProvider, + RequirementSetWithCandidates, +) from pip._internal.resolution.resolvelib.provider import PipProvider from pip._internal.resolution.resolvelib.reporter import ( PipDebuggingReporter, @@ -71,7 +75,7 @@ def __init__( def resolve( self, root_reqs: List[InstallRequirement], check_supported_wheels: bool - ) -> RequirementSet: + ) -> RequirementSetWithCandidates: collected = self.factory.collect_root_requirements(root_reqs) provider = PipProvider( factory=self.factory, @@ -102,7 +106,9 @@ def resolve( ) raise error from e - req_set = RequirementSet(check_supported_wheels=check_supported_wheels) + req_set = RequirementSetWithCandidates( + candidates=result, check_supported_wheels=check_supported_wheels + ) for candidate in result.mapping.values(): ireq = candidate.get_install_requirement() if ireq is None: diff --git a/tests/unit/resolution_resolvelib/test_resolver.py b/tests/unit/resolution_resolvelib/test_resolver.py index 579195b55ea..62dabfe0cd7 100644 --- a/tests/unit/resolution_resolvelib/test_resolver.py +++ b/tests/unit/resolution_resolvelib/test_resolver.py @@ -28,6 +28,7 @@ def resolver(preparer: RequirementPreparer, finder: PackageFinder) -> Resolver: ignore_installed=False, ignore_requires_python=False, force_reinstall=False, + avoid_wheel_downloads=True, upgrade_strategy="to-satisfy-only", ) return resolver diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py index 8b60c302915..577fa375697 100644 --- a/tests/unit/test_collector.py +++ b/tests/unit/test_collector.py @@ -2,6 +2,7 @@ import logging import os.path import re +import urllib.parse import urllib.request import uuid from textwrap import dedent @@ -15,8 +16,6 @@ from pip._internal.index.collector import ( HTMLPage, LinkCollector, - _clean_link, - _clean_url_path, _determine_base_url, _get_html_page, _get_html_response, @@ -28,12 +27,27 @@ from pip._internal.index.sources import _FlatDirectorySource, _IndexDirectorySource from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.index import PyPI -from pip._internal.models.link import Link +from pip._internal.models.link import Link, _clean_url_path from pip._internal.network.session import PipSession from tests.lib import TestData, make_test_link_collector from tests.lib.path import Path +def _clean_link(url: str) -> str: + """ + Make sure a link is fully quoted. + For example, if ' ' occurs in the URL, it will be replaced with "%20", + and without double-quoting other characters. + """ + # Split the URL into parts according to the general structure + # `scheme://netloc/path;parameters?query#fragment`. + result = urllib.parse.urlparse(url) + # If the netloc is empty, then the URL refers to a local filesystem path. + is_local_path = not result.netloc + path = _clean_url_path(result.path, is_local_path=is_local_path) + return urllib.parse.urlunparse(result._replace(path=path)) + + @pytest.mark.parametrize( "url", [