Skip to content

Commit

Permalink
add PEP 658 support!!!
Browse files Browse the repository at this point in the history
move url cleaning to link.py

use a nice dataclass to decouple hash parsing from Link

avoid downloading wheels when testing the resolver in isolation

avoid special-casing the python version requirement in download.py

streamline the RequirementSetWithCandidates invocation

restore _clean_link method from collector.py to pass tests
  • Loading branch information
cosmicexplorer committed Dec 28, 2021
1 parent d4ccc39 commit 8c755e8
Show file tree
Hide file tree
Showing 12 changed files with 400 additions and 174 deletions.
123 changes: 112 additions & 11 deletions src/pip/_internal/commands/download.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,99 @@
import json
import logging
import os
from dataclasses import dataclass, field
from optparse import Values
from typing import Dict, List
from typing import Any, Dict, List, Optional, Union

from pip._vendor.packaging.requirements import Requirement

from pip._internal.cli import cmdoptions
from pip._internal.cli.cmdoptions import make_target_python
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
from pip._internal.cli.status_codes import SUCCESS
from pip._internal.models.link import Link, LinkHash
from pip._internal.req.req_tracker import get_requirement_tracker
from pip._internal.resolution.base import RequirementSetWithCandidates
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
from pip._internal.utils.temp_dir import TempDirectory

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class DistInfoMetadata:
"""???/From PEP 658"""

metadata_url: str
metadata_hash: Optional[LinkHash]

@classmethod
def from_link(cls, link: Link) -> Optional["DistInfoMetadata"]:
if link.dist_info_metadata is None:
return None

metadata_url = f"{link.url_without_fragment}.metadata"
if link.dist_info_metadata == "true":
metadata_hash = None
else:
metadata_hash = LinkHash.split_hash_name_and_value(link.dist_info_metadata)

return cls(metadata_url=metadata_url, metadata_hash=metadata_hash)

def as_json(self) -> Dict[str, Union[str, Optional[Dict[str, str]]]]:
return {
"metadata_url": self.metadata_url,
"metadata_hash": (
self.metadata_hash.as_json() if self.metadata_hash else None
),
}


@dataclass(frozen=True)
class RequirementDownloadInfo:
req: Requirement
url: str
file_hash: Optional[LinkHash]
dist_info_metadata: Optional[DistInfoMetadata]

@classmethod
def from_req_and_link(
cls,
req: Requirement,
link: Link,
) -> "RequirementDownloadInfo":
return cls(
req=req,
url=link.url,
file_hash=link.get_link_hash(),
dist_info_metadata=DistInfoMetadata.from_link(link),
)

def as_json(self) -> Dict[str, Any]:
return {
"req": str(self.req),
"url": self.url,
"hash": self.file_hash and self.file_hash.as_json(),
"dist_info_metadata": (
self.dist_info_metadata and self.dist_info_metadata.as_json()
),
}


@dataclass
class DownloadInfos:
implicit_requirements: List[Requirement] = field(default_factory=list)
resolution: Dict[str, RequirementDownloadInfo] = field(default_factory=dict)

def as_json(self) -> Dict[str, Any]:
return {
"implicit_requirements": [str(req) for req in self.implicit_requirements],
"resolution": {
name: info.as_json() for name, info in self.resolution.items()
},
}


class DownloadCommand(RequirementCommand):
"""
Download packages from:
Expand Down Expand Up @@ -149,24 +228,46 @@ def run(self, options: Values, args: List[str]) -> int:
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)

downloaded: List[str] = []
download_infos: List[Dict[str, str]] = []
for req in requirement_set.requirements.values():
# If this distribution was not already satisfied, that means we
# downloaded it.
if req.satisfied_by is None:
assert req.name is not None
assert req.link is not None
download_infos.append(
{
"name": req.name,
"url": req.link.url,
}
)
preparer.save_linked_requirement(req)
assert req.name is not None
downloaded.append(req.name)

download_infos = DownloadInfos()
if options.print_download_urls:
if isinstance(requirement_set, RequirementSetWithCandidates):
for candidate in requirement_set.candidates.mapping.values():
# This will occur for the python version requirement, for example.
if candidate.name not in requirement_set.requirements:
download_infos.implicit_requirements.append(
candidate.as_serializable_requirement()
)
continue
req = requirement_set.requirements[candidate.name]
assert req.name is not None
assert req.link is not None
assert req.name not in download_infos.resolution
download_infos.resolution[
req.name
] = RequirementDownloadInfo.from_req_and_link(
req=candidate.as_serializable_requirement(),
link=req.link,
)
else:
logger.warning(
"--print-download-urls is being used with the legacy resolver. "
"The legacy resolver does not retain detailed dependency "
"information, so all the fields in the output JSON file "
"will be empty."
)

if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))
if options.print_download_urls:
with open(options.print_download_urls, "w") as f:
json.dump(download_infos, f, indent=4)
json.dump(download_infos.as_json(), f, indent=4)

return SUCCESS
101 changes: 3 additions & 98 deletions src/pip/_internal/index/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@
import itertools
import logging
import os
import re
import urllib.parse
import urllib.request
import xml.etree.ElementTree
from optparse import Values
from typing import (
Callable,
Expand All @@ -29,19 +27,18 @@
from pip._vendor.requests.exceptions import RetryError, SSLError

from pip._internal.exceptions import NetworkConnectionError
from pip._internal.models.link import Link
from pip._internal.models.link import HTMLElement, Link
from pip._internal.models.search_scope import SearchScope
from pip._internal.network.session import PipSession
from pip._internal.network.utils import raise_for_status
from pip._internal.utils.filetypes import is_archive_file
from pip._internal.utils.misc import pairwise, redact_auth_from_url
from pip._internal.utils.misc import redact_auth_from_url
from pip._internal.vcs import vcs

from .sources import CandidatesFromPage, LinkSource, build_source

logger = logging.getLogger(__name__)

HTMLElement = xml.etree.ElementTree.Element
ResponseHeaders = MutableMapping[str, str]


Expand Down Expand Up @@ -171,94 +168,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
return page_url


def _clean_url_path_part(part: str) -> str:
"""
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
return urllib.parse.quote(urllib.parse.unquote(part))


def _clean_file_url_path(part: str) -> str:
"""
Clean the first part of a URL path that corresponds to a local
filesystem path (i.e. the first part after splitting on "@" characters).
"""
# We unquote prior to quoting to make sure nothing is double quoted.
# Also, on Windows the path part might contain a drive letter which
# should not be quoted. On Linux where drive letters do not
# exist, the colon should be quoted. We rely on urllib.request
# to do the right thing here.
return urllib.request.pathname2url(urllib.request.url2pathname(part))


# percent-encoded: /
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)


def _clean_url_path(path: str, is_local_path: bool) -> str:
"""
Clean the path portion of a URL.
"""
if is_local_path:
clean_func = _clean_file_url_path
else:
clean_func = _clean_url_path_part

# Split on the reserved characters prior to cleaning so that
# revision strings in VCS URLs are properly preserved.
parts = _reserved_chars_re.split(path)

cleaned_parts = []
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
cleaned_parts.append(clean_func(to_clean))
# Normalize %xx escapes (e.g. %2f -> %2F)
cleaned_parts.append(reserved.upper())

return "".join(cleaned_parts)


def _clean_link(url: str) -> str:
"""
Make sure a link is fully quoted.
For example, if ' ' occurs in the URL, it will be replaced with "%20",
and without double-quoting other characters.
"""
# Split the URL into parts according to the general structure
# `scheme://netloc/path;parameters?query#fragment`.
result = urllib.parse.urlparse(url)
# If the netloc is empty, then the URL refers to a local filesystem path.
is_local_path = not result.netloc
path = _clean_url_path(result.path, is_local_path=is_local_path)
return urllib.parse.urlunparse(result._replace(path=path))


def _create_link_from_element(
anchor: HTMLElement,
page_url: str,
base_url: str,
) -> Optional[Link]:
"""
Convert an anchor element in a simple repository page to a Link.
"""
href = anchor.get("href")
if not href:
return None

url = _clean_link(urllib.parse.urljoin(base_url, href))
pyrequire = anchor.get("data-requires-python")
yanked_reason = anchor.get("data-yanked")

link = Link(
url,
comes_from=page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
)

return link


class CacheablePageContent:
def __init__(self, page: "HTMLPage") -> None:
assert page.cache_link_parsing
Expand Down Expand Up @@ -307,11 +216,7 @@ def parse_links(page: "HTMLPage") -> Iterable[Link]:
url = page.url
base_url = _determine_base_url(document, url)
for anchor in document.findall(".//a"):
link = _create_link_from_element(
anchor,
page_url=url,
base_url=base_url,
)
link = Link.from_element(anchor, page_url=url, base_url=base_url)
if link is None:
continue
yield link
Expand Down
3 changes: 3 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ def __repr__(self) -> str:
def __str__(self) -> str:
return f"{self.raw_name} {self.version}"

def as_serializable_requirement(self) -> Requirement:
raise NotImplementedError()

@property
def location(self) -> Optional[str]:
"""Where the distribution is loaded from.
Expand Down
3 changes: 3 additions & 0 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> "Distribution":
)
return cls(dist)

def as_serializable_requirement(self) -> Requirement:
return self._dist.as_requirement()

@property
def location(self) -> Optional[str]:
return self._dist.location
Expand Down
Loading

0 comments on commit 8c755e8

Please sign in to comment.