diff --git a/news/11137.feature.rst b/news/11137.feature.rst new file mode 100644 index 00000000000..c5986f4fd89 --- /dev/null +++ b/news/11137.feature.rst @@ -0,0 +1,3 @@ +Record in wheel cache entries the URL of the original artifiact that was downloaded +to build the cached wheels. The record is named ``origin.json`` and uses the PEP 610 +Direct URL format. diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index 1d6df220118..1edcc76722b 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -5,12 +5,14 @@ import json import logging import os +from pathlib import Path from typing import Any, Dict, List, Optional, Set from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._vendor.packaging.utils import canonicalize_name from pip._internal.exceptions import InvalidWheelFilename +from pip._internal.models.direct_url import DirectUrl from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel @@ -19,6 +21,8 @@ logger = logging.getLogger(__name__) +ORIGIN_JSON_NAME = "origin.json" + def _hash_dict(d: Dict[str, str]) -> str: """Return a stable sha224 of a dictionary.""" @@ -204,6 +208,10 @@ def __init__( ): self.link = link self.persistent = persistent + self.origin: Optional[DirectUrl] = None + origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME + if origin_direct_url_path.exists(): + self.origin = DirectUrl.from_json(origin_direct_url_path.read_text()) class WheelCache(Cache): @@ -262,3 +270,20 @@ def get_cache_entry( return CacheEntry(retval, persistent=False) return None + + @staticmethod + def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None: + origin_path = Path(cache_dir) / ORIGIN_JSON_NAME + if origin_path.is_file(): + origin = DirectUrl.from_json(origin_path.read_text()) + # TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564 + # is merged. + if origin.url != download_info.url: + logger.warning( + "Origin URL %s in cache entry %s does not match download URL %s. " + "This is likely a pip bug or a cache corruption issue.", + origin.url, + cache_dir, + download_info.url, + ) + origin_path.write_text(download_info.to_json()) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index df1016ebbd1..80723fffe47 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -25,6 +25,7 @@ ) from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import BaseDistribution +from pip._internal.models.direct_url import ArchiveInfo from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.network.download import BatchDownloader, Downloader @@ -35,9 +36,18 @@ from pip._internal.network.session import PipSession from pip._internal.operations.build.build_tracker import BuildTracker from pip._internal.req.req_install import InstallRequirement +from pip._internal.utils.direct_url_helpers import ( + direct_url_for_editable, + direct_url_from_link, +) from pip._internal.utils.hashes import Hashes, MissingHashes from pip._internal.utils.logging import indent_log -from pip._internal.utils.misc import display_path, hide_url, is_installable_dir +from pip._internal.utils.misc import ( + display_path, + hash_file, + hide_url, + is_installable_dir, +) from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.unpacking import unpack_file from pip._internal.vcs import vcs @@ -489,6 +499,23 @@ def _prepare_linked_requirement( hashes.check_against_path(file_path) local_file = File(file_path, content_type=None) + # If download_info is set, we got it from the wheel cache. + if req.download_info is None: + # Editables don't go through this function (see + # prepare_editable_requirement). + assert not req.editable + req.download_info = direct_url_from_link(link, req.source_dir) + # Make sure we have a hash in download_info. If we got it as part of the + # URL, it will have been verified and we can rely on it. Otherwise we + # compute it from the downloaded file. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and not req.download_info.info.hash + and local_file + ): + hash = hash_file(local_file.path)[0].hexdigest() + req.download_info.info.hash = f"sha256={hash}" + # For use in later processing, # preserve the file path on the requirement. if local_file: @@ -547,6 +574,8 @@ def prepare_editable_requirement( ) req.ensure_has_source_dir(self.src_dir) req.update_editable() + assert req.source_dir + req.download_info = direct_url_for_editable(req.unpacked_source_directory) dist = _get_prepared_distribution( req, diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index b40d9e251f8..e01da2d69ef 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -26,6 +26,7 @@ get_default_environment, get_directory_distribution, ) +from pip._internal.models.direct_url import DirectUrl from pip._internal.models.link import Link from pip._internal.operations.build.metadata import generate_metadata from pip._internal.operations.build.metadata_editable import generate_editable_metadata @@ -112,6 +113,10 @@ def __init__( self.link = self.original_link = link self.original_link_is_in_wheel_cache = False + # Information about the location of the artifact that was downloaded . This + # property is guaranteed to be set in resolver results. + self.download_info: Optional[DirectUrl] = None + # Path to any downloaded or already-existing package. self.local_file_path: Optional[str] = None if self.link and self.link.is_file: @@ -762,6 +767,7 @@ def install( if self.is_wheel: assert self.local_file_path direct_url = None + # TODO this can be refactored to direct_url = self.download_info if self.editable: direct_url = direct_url_for_editable(self.unpacked_source_directory) elif self.original_link: diff --git a/src/pip/_internal/resolution/legacy/resolver.py b/src/pip/_internal/resolution/legacy/resolver.py index 1225ae70fcb..fb49d41695f 100644 --- a/src/pip/_internal/resolution/legacy/resolver.py +++ b/src/pip/_internal/resolution/legacy/resolver.py @@ -45,6 +45,7 @@ from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider from pip._internal.utils import compatibility_tags from pip._internal.utils.compatibility_tags import get_supported +from pip._internal.utils.direct_url_helpers import direct_url_from_link from pip._internal.utils.logging import indent_log from pip._internal.utils.misc import normalize_version_info from pip._internal.utils.packaging import check_requires_python @@ -431,6 +432,14 @@ def _populate_link(self, req: InstallRequirement) -> None: logger.debug("Using cached wheel link: %s", cache_entry.link) if req.link is req.original_link and cache_entry.persistent: req.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + req.download_info = cache_entry.origin + else: + # Legacy cache entry that does not have origin.json. + # download_info may miss the archive_info.hash field. + req.download_info = direct_url_from_link( + req.link, link_is_in_wheel_cache=cache_entry.persistent + ) req.link = cache_entry.link def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution: diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index d1470ecbf4e..f5bc343b91b 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -18,6 +18,7 @@ install_req_from_line, ) from pip._internal.req.req_install import InstallRequirement +from pip._internal.utils.direct_url_helpers import direct_url_from_link from pip._internal.utils.misc import normalize_version_info from .base import Candidate, CandidateVersion, Requirement, format_name @@ -281,12 +282,17 @@ def __init__( version, wheel_version, name ) - if ( - cache_entry is not None - and cache_entry.persistent - and template.link is template.original_link - ): - ireq.original_link_is_in_wheel_cache = True + if cache_entry is not None: + if cache_entry.persistent and template.link is template.original_link: + ireq.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + ireq.download_info = cache_entry.origin + else: + # Legacy cache entry that does not have origin.json. + # download_info may miss the archive_info.hash field. + ireq.download_info = direct_url_from_link( + source_link, link_is_in_wheel_cache=cache_entry.persistent + ) super().__init__( link=link, diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index d0663443b22..77a17ff0f15 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -354,6 +354,12 @@ def build( req.editable and req.permit_editable_wheels, ) if wheel_file: + # Record the download origin in the cache + if req.download_info is not None: + # download_info is guaranteed to be set because when we build an + # InstallRequirement it has been through the preparer before, but + # let's be cautious. + wheel_cache.record_download_origin(cache_dir, req.download_info) # Update the link for this. req.link = Link(path_to_url(wheel_file)) req.local_file_path = req.link.file_path diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index bec8b72fc96..3bf2579ed4b 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -1550,9 +1550,9 @@ def test_install_builds_wheels(script: PipTestEnvironment, data: TestData) -> No ) # Must have installed it all assert expected in str(res), str(res) - wheels = [] + wheels: List[str] = [] for _, _, files in os.walk(wheels_cache): - wheels.extend(files) + wheels.extend(f for f in files if f.endswith(".whl")) # and built wheels for upper and wheelbroken assert "Building wheel for upper" in str(res), str(res) assert "Building wheel for wheelb" in str(res), str(res) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index ac68fa4df57..18932bb344d 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -5,13 +5,14 @@ import sys import tempfile from functools import partial -from typing import Iterator, Tuple, cast +from typing import Iterator, Optional, Tuple, cast from unittest import mock import pytest from pip._vendor.packaging.markers import Marker from pip._vendor.packaging.requirements import Requirement +from pip._internal.cache import WheelCache from pip._internal.commands import create_command from pip._internal.commands.install import InstallCommand from pip._internal.exceptions import ( @@ -22,6 +23,9 @@ ) from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import select_backend +from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo +from pip._internal.models.format_control import FormatControl +from pip._internal.models.link import Link from pip._internal.network.session import PipSession from pip._internal.operations.build.build_tracker import get_build_tracker from pip._internal.operations.prepare import RequirementPreparer @@ -42,7 +46,7 @@ ) from pip._internal.resolution.legacy.resolver import Resolver from pip._internal.utils.urls import path_to_url -from tests.lib import TestData, make_test_finder, requirements_file +from tests.lib import TestData, make_test_finder, requirements_file, wheel from tests.lib.path import Path @@ -76,7 +80,10 @@ def teardown(self) -> None: @contextlib.contextmanager def _basic_resolver( - self, finder: PackageFinder, require_hashes: bool = False + self, + finder: PackageFinder, + require_hashes: bool = False, + wheel_cache: Optional[WheelCache] = None, ) -> Iterator[Resolver]: make_install_req = partial( install_req_from_req_string, @@ -105,7 +112,7 @@ def _basic_resolver( preparer=preparer, make_install_req=make_install_req, finder=finder, - wheel_cache=None, + wheel_cache=wheel_cache, use_user_site=False, upgrade_strategy="to-satisfy-only", ignore_dependencies=False, @@ -342,6 +349,161 @@ def test_hashed_deps_on_require_hashes(self) -> None: ) ) + def test_download_info_find_links(self, data: TestData) -> None: + """Test that download_info is set for requirements via find_links.""" + finder = make_test_finder(find_links=[data.find_links]) + with self._basic_resolver(finder) as resolver: + ireq = get_processed_req_from_line("simple") + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert isinstance(req.download_info.info, ArchiveInfo) + assert req.download_info.info.hash + + @pytest.mark.network + def test_download_info_index_url(self) -> None: + """Test that download_info is set for requirements via index.""" + finder = make_test_finder(index_urls=["https://pypi.org/simple"]) + with self._basic_resolver(finder) as resolver: + ireq = get_processed_req_from_line("initools") + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert isinstance(req.download_info.info, ArchiveInfo) + + @pytest.mark.network + def test_download_info_web_archive(self) -> None: + """Test that download_info is set for requirements from a web archive.""" + finder = make_test_finder() + with self._basic_resolver(finder) as resolver: + ireq = get_processed_req_from_line( + "pip-test-package @ " + "https://github.com/pypa/pip-test-package/tarball/0.1.1" + ) + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert ( + req.download_info.url + == "https://github.com/pypa/pip-test-package/tarball/0.1.1" + ) + assert isinstance(req.download_info.info, ArchiveInfo) + assert ( + req.download_info.info.hash == "sha256=" + "ad977496000576e1b6c41f6449a9897087ce9da6db4f15b603fe8372af4bf3c6" + ) + + def test_download_info_archive_legacy_cache( + self, tmp_path: Path, shared_data: TestData + ) -> None: + """Test download_info hash is not set for an archive with legacy cache entry.""" + url = path_to_url(shared_data.packages / "simple-1.0.tar.gz") + finder = make_test_finder() + wheel_cache = WheelCache(str(tmp_path / "cache"), FormatControl()) + cache_entry_dir = wheel_cache.get_path_for_link(Link(url)) + Path(cache_entry_dir).mkdir(parents=True) + wheel.make_wheel(name="simple", version="1.0").save_to_dir(cache_entry_dir) + with self._basic_resolver(finder, wheel_cache=wheel_cache) as resolver: + ireq = get_processed_req_from_line(f"simple @ {url}") + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.original_link_is_in_wheel_cache + assert req.download_info + assert req.download_info.url == url + assert isinstance(req.download_info.info, ArchiveInfo) + assert not req.download_info.info.hash + + def test_download_info_archive_cache_with_origin( + self, tmp_path: Path, shared_data: TestData + ) -> None: + """Test download_info hash is set for a web archive with cache entry + that has origin.json.""" + url = path_to_url(shared_data.packages / "simple-1.0.tar.gz") + hash = "sha256=ad977496000576e1b6c41f6449a9897087ce9da6db4f15b603fe8372af4bf3c6" + finder = make_test_finder() + wheel_cache = WheelCache(str(tmp_path / "cache"), FormatControl()) + cache_entry_dir = wheel_cache.get_path_for_link(Link(url)) + Path(cache_entry_dir).mkdir(parents=True) + Path(cache_entry_dir).joinpath("origin.json").write_text( + DirectUrl(url, ArchiveInfo(hash=hash)).to_json() + ) + wheel.make_wheel(name="simple", version="1.0").save_to_dir(cache_entry_dir) + with self._basic_resolver(finder, wheel_cache=wheel_cache) as resolver: + ireq = get_processed_req_from_line(f"simple @ {url}") + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.original_link_is_in_wheel_cache + assert req.download_info + assert req.download_info.url == url + assert isinstance(req.download_info.info, ArchiveInfo) + assert req.download_info.info.hash == hash + + def test_download_info_local_wheel(self, data: TestData) -> None: + """Test that download_info is set for requirements from a local wheel.""" + finder = make_test_finder() + with self._basic_resolver(finder) as resolver: + ireq = get_processed_req_from_line( + f"{data.packages}/simplewheel-1.0-py2.py3-none-any.whl" + ) + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert req.download_info.url.startswith("file://") + assert isinstance(req.download_info.info, ArchiveInfo) + assert ( + req.download_info.info.hash == "sha256=" + "e63aa139caee941ec7f33f057a5b987708c2128238357cf905429846a2008718" + ) + + def test_download_info_local_dir(self, data: TestData) -> None: + """Test that download_info is set for requirements from a local dir.""" + finder = make_test_finder() + with self._basic_resolver(finder) as resolver: + ireq_url = path_to_url(data.packages / "FSPkg") + ireq = get_processed_req_from_line(f"FSPkg @ {ireq_url}") + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert req.download_info.url.startswith("file://") + assert isinstance(req.download_info.info, DirInfo) + + def test_download_info_local_editable_dir(self, data: TestData) -> None: + """Test that download_info is set for requirements from a local editable dir.""" + finder = make_test_finder() + with self._basic_resolver(finder) as resolver: + ireq_url = path_to_url(data.packages / "FSPkg") + ireq = get_processed_req_from_line(f"-e {ireq_url}#egg=FSPkg") + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert req.download_info.url.startswith("file://") + assert isinstance(req.download_info.info, DirInfo) + assert req.download_info.info.editable + + @pytest.mark.network + def test_download_info_vcs(self) -> None: + """Test that download_info is set for requirements from git.""" + finder = make_test_finder() + with self._basic_resolver(finder) as resolver: + ireq = get_processed_req_from_line( + "pip-test-package @ git+https://github.com/pypa/pip-test-package" + ) + reqset = resolver.resolve([ireq], True) + assert len(reqset.all_requirements) == 1 + req = reqset.all_requirements[0] + assert req.download_info + assert isinstance(req.download_info.info, VcsInfo) + assert req.download_info.url == "https://github.com/pypa/pip-test-package" + assert req.download_info.info.vcs == "git" + class TestInstallRequirement: def setup(self) -> None: