From 26fbfd6725d36e94a4f85737be0ef65b945f21ce Mon Sep 17 00:00:00 2001 From: quantum-byte Date: Tue, 7 Mar 2023 17:14:45 +0100 Subject: [PATCH] installer: fix handling of sdist hashes (#7594) * fix hash mismatch during installation with `no-binary` option for packages that provide suitable wheels * do not skip hash check for sdists * write hash for sdists in direct_url.json --- src/poetry/installation/chef.py | 9 +- src/poetry/installation/executor.py | 21 ++- tests/installation/test_chef.py | 83 +++++++++++- tests/installation/test_executor.py | 200 ++++++++++++++++++++++------ 4 files changed, 262 insertions(+), 51 deletions(-) diff --git a/src/poetry/installation/chef.py b/src/poetry/installation/chef.py index 0115e439200..6eb1ae3f21b 100644 --- a/src/poetry/installation/chef.py +++ b/src/poetry/installation/chef.py @@ -101,7 +101,6 @@ def prepare( if archive.is_dir(): tmp_dir = tempfile.mkdtemp(prefix="poetry-chef-") - return self._prepare(archive, Path(tmp_dir), editable=editable) return self._prepare_sdist(archive, destination=output_dir) @@ -198,13 +197,19 @@ def _should_prepare(self, archive: Path) -> bool: def _is_wheel(cls, archive: Path) -> bool: return archive.suffix == ".whl" - def get_cached_archive_for_link(self, link: Link) -> Path | None: + def get_cached_archive_for_link(self, link: Link, *, strict: bool) -> Path | None: archives = self.get_cached_archives_for_link(link) if not archives: return None candidates: list[tuple[float | None, Path]] = [] for archive in archives: + if strict: + # in strict mode return the original cached archive instead of the + # prioritized archive type. + if link.filename == archive.name: + return archive + continue if archive.suffix != ".whl": candidates.append((float("inf"), archive)) continue diff --git a/src/poetry/installation/executor.py b/src/poetry/installation/executor.py index 22972882fcb..70e61a2cea6 100644 --- a/src/poetry/installation/executor.py +++ b/src/poetry/installation/executor.py @@ -693,11 +693,12 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path: package = operation.package output_dir = self._chef.get_cache_directory_for_link(link) - archive = self._chef.get_cached_archive_for_link(link) - if archive is None: - # No cached distributions was found, so we download and prepare it + # Try to get cached original package for the link provided + original_archive = self._chef.get_cached_archive_for_link(link, strict=True) + if original_archive is None: + # No cached original distributions was found, so we download and prepare it try: - archive = self._download_archive(operation, link) + original_archive = self._download_archive(operation, link) except BaseException: cache_directory = self._chef.get_cache_directory_for_link(link) cached_file = cache_directory.joinpath(link.filename) @@ -708,6 +709,13 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path: raise + # Get potential higher prioritized cached archive, otherwise it will fall back + # to the original archive. + archive = self._chef.get_cached_archive_for_link(link, strict=False) + # 'archive' can at this point never be None. Since we previously downloaded + # an archive, we now should have something cached that we can use here + assert archive is not None + if archive.suffix != ".whl": message = ( f" • {self.get_operation_message(operation)}:" @@ -717,7 +725,8 @@ def _download_link(self, operation: Install | Update, link: Link) -> Path: archive = self._chef.prepare(archive, output_dir=output_dir) - self._populate_hashes_dict(archive, package) + # Use the original archive to provide the correct hash. + self._populate_hashes_dict(original_archive, package) return archive @@ -729,7 +738,7 @@ def _populate_hashes_dict(self, archive: Path, package: Package) -> None: @staticmethod def _validate_archive_hash(archive: Path, package: Package) -> str: archive_hash: str = "sha256:" + get_file_hash(archive) - known_hashes = {f["hash"] for f in package.files} + known_hashes = {f["hash"] for f in package.files if f["file"] == archive.name} if archive_hash not in known_hashes: raise RuntimeError( diff --git a/tests/installation/test_chef.py b/tests/installation/test_chef.py index e9046c5433f..19283cdeaaa 100644 --- a/tests/installation/test_chef.py +++ b/tests/installation/test_chef.py @@ -1,5 +1,8 @@ from __future__ import annotations +import os +import tempfile + from pathlib import Path from typing import TYPE_CHECKING from zipfile import ZipFile @@ -38,20 +41,80 @@ def setup(mocker: MockerFixture, pool: RepositoryPool) -> None: @pytest.mark.parametrize( - ("link", "cached"), + ("link", "strict", "available_packages"), + [ + ( + "https://files.python-poetry.org/demo-0.1.0.tar.gz", + True, + [ + Path("/cache/demo-0.1.0-py2.py3-none-any"), + Path("/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl"), + Path("/cache/demo-0.1.0-cp37-cp37-macosx_10_15_x86_64.whl"), + ], + ), + ( + "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + False, + [], + ), + ], +) +def test_get_not_found_cached_archive_for_link( + config: Config, + mocker: MockerFixture, + link: str, + strict: bool, + available_packages: list[Path], +): + chef = Chef( + config, + MockEnv( + version_info=(3, 8, 3), + marker_env={"interpreter_name": "cpython", "interpreter_version": "3.8.3"}, + supported_tags=[ + Tag("cp38", "cp38", "macosx_10_15_x86_64"), + Tag("py3", "none", "any"), + ], + ), + Factory.create_pool(config), + ) + + mocker.patch.object( + chef, "get_cached_archives_for_link", return_value=available_packages + ) + + archive = chef.get_cached_archive_for_link(Link(link), strict=strict) + + assert archive is None + + +@pytest.mark.parametrize( + ("link", "cached", "strict"), [ ( "https://files.python-poetry.org/demo-0.1.0.tar.gz", "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + False, ), ( "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + False, + ), + ( + "https://files.python-poetry.org/demo-0.1.0.tar.gz", + "/cache/demo-0.1.0.tar.gz", + True, + ), + ( + "https://example.com/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + "/cache/demo-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl", + True, ), ], ) -def test_get_cached_archive_for_link( - config: Config, mocker: MockerFixture, link: str, cached: str +def test_get_found_cached_archive_for_link( + config: Config, mocker: MockerFixture, link: str, cached: str, strict: bool ): chef = Chef( config, @@ -77,7 +140,7 @@ def test_get_cached_archive_for_link( ], ) - archive = chef.get_cached_archive_for_link(Link(link)) + archive = chef.get_cached_archive_for_link(Link(link), strict=strict) assert Path(cached) == archive @@ -153,6 +216,10 @@ def test_prepare_directory(config: Config, config_cache_dir: Path): assert wheel.name == "simple_project-1.2.3-py2.py3-none-any.whl" + assert wheel.parent.parent == Path(tempfile.gettempdir()) + # cleanup generated tmp dir artifact + os.unlink(wheel) + def test_prepare_directory_with_extensions( config: Config, config_cache_dir: Path @@ -168,8 +235,12 @@ def test_prepare_directory_with_extensions( wheel = chef.prepare(archive) + assert wheel.parent.parent == Path(tempfile.gettempdir()) assert wheel.name == f"extended-0.1-{env.supported_tags[0]}.whl" + # cleanup generated tmp dir artifact + os.unlink(wheel) + def test_prepare_directory_editable(config: Config, config_cache_dir: Path): chef = Chef(config, EnvManager.get_system_env(), Factory.create_pool(config)) @@ -178,7 +249,11 @@ def test_prepare_directory_editable(config: Config, config_cache_dir: Path): wheel = chef.prepare(archive, editable=True) + assert wheel.parent.parent == Path(tempfile.gettempdir()) assert wheel.name == "simple_project-1.2.3-py2.py3-none-any.whl" with ZipFile(wheel) as z: assert "simple_project.pth" in z.namelist() + + # cleanup generated tmp dir artifact + os.unlink(wheel) diff --git a/tests/installation/test_executor.py b/tests/installation/test_executor.py index 34f397e6bd4..578b270354b 100644 --- a/tests/installation/test_executor.py +++ b/tests/installation/test_executor.py @@ -139,9 +139,14 @@ def callback( ) if not fixture.exists(): - fixture = Path(__file__).parent.parent.joinpath( - "fixtures/distributions/demo-0.1.0-py2.py3-none-any.whl" - ) + if name == "demo-0.1.0.tar.gz": + fixture = Path(__file__).parent.parent.joinpath( + "fixtures/distributions/demo-0.1.0.tar.gz" + ) + else: + fixture = Path(__file__).parent.parent.joinpath( + "fixtures/distributions/demo-0.1.0-py2.py3-none-any.whl" + ) return [200, headers, fixture.read_bytes()] @@ -533,7 +538,7 @@ def test_executor_should_delete_incomplete_downloads( ) mocker.patch( "poetry.installation.chef.Chef.get_cached_archive_for_link", - side_effect=lambda link: None, + side_effect=lambda link, strict: None, ) mocker.patch( "poetry.installation.chef.Chef.get_cache_directory_for_link", @@ -601,6 +606,12 @@ def test_executor_should_not_write_pep610_url_references_for_cached_package( io: BufferedIO, ): link_cached = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" + package.files = [ + { + "file": "demo-0.1.0-py2.py3-none-any.whl", + "hash": "sha256:70e704135718fffbcbf61ed1fc45933cfd86951a744b681000eaaa75da31f17a", # noqa: E501 + } + ] mocker.patch( "poetry.installation.executor.Executor._download", return_value=link_cached @@ -611,7 +622,7 @@ def test_executor_should_not_write_pep610_url_references_for_cached_package( verify_installed_distribution(tmp_venv, package) -def test_executor_should_write_pep610_url_references_for_files( +def test_executor_should_write_pep610_url_references_for_wheel_files( tmp_venv: VirtualEnv, pool: RepositoryPool, config: Config, io: BufferedIO ): url = ( @@ -645,6 +656,38 @@ def test_executor_should_write_pep610_url_references_for_files( verify_installed_distribution(tmp_venv, package, expected_url_reference) +def test_executor_should_write_pep610_url_references_for_non_wheel_files( + tmp_venv: VirtualEnv, pool: RepositoryPool, config: Config, io: BufferedIO +): + url = ( + Path(__file__) + .parent.parent.joinpath("fixtures/distributions/demo-0.1.0.tar.gz") + .resolve() + ) + package = Package("demo", "0.1.0", source_type="file", source_url=url.as_posix()) + # Set package.files so the executor will attempt to hash the package + package.files = [ + { + "file": "demo-0.1.0.tar.gz", + "hash": "sha256:9fa123ad707a5c6c944743bf3e11a0e80d86cb518d3cf25320866ca3ef43e2ad", # noqa: E501 + } + ] + + executor = Executor(tmp_venv, pool, config, io) + executor.execute([Install(package)]) + expected_url_reference = { + "archive_info": { + "hashes": { + "sha256": ( + "9fa123ad707a5c6c944743bf3e11a0e80d86cb518d3cf25320866ca3ef43e2ad" + ) + }, + }, + "url": url.as_uri(), + } + verify_installed_distribution(tmp_venv, package, expected_url_reference) + + def test_executor_should_write_pep610_url_references_for_directories( tmp_venv: VirtualEnv, pool: RepositoryPool, @@ -703,13 +746,25 @@ def test_executor_should_write_pep610_url_references_for_editable_directories( ) -def test_executor_should_write_pep610_url_references_for_urls( +@pytest.mark.parametrize("is_artifact_cached", [False, True]) +def test_executor_should_write_pep610_url_references_for_wheel_urls( tmp_venv: VirtualEnv, pool: RepositoryPool, config: Config, io: BufferedIO, mock_file_downloads: None, + mocker: MockerFixture, + fixture_dir: FixtureDirGetter, + is_artifact_cached: bool, ): + if is_artifact_cached: + link_cached = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" + mocker.patch( + "poetry.installation.chef.Chef.get_cached_archive_for_link", + return_value=link_cached, + ) + download_spy = mocker.spy(Executor, "_download_archive") + package = Package( "demo", "0.1.0", @@ -725,7 +780,8 @@ def test_executor_should_write_pep610_url_references_for_urls( ] executor = Executor(tmp_venv, pool, config, io) - executor.execute([Install(package)]) + operation = Install(package) + executor.execute([operation]) expected_url_reference = { "archive_info": { "hashes": { @@ -737,6 +793,104 @@ def test_executor_should_write_pep610_url_references_for_urls( "url": package.source_url, } verify_installed_distribution(tmp_venv, package, expected_url_reference) + if is_artifact_cached: + download_spy.assert_not_called() + else: + download_spy.assert_called_once_with( + mocker.ANY, operation, Link(package.source_url) + ) + + +@pytest.mark.parametrize( + ( + "is_sdist_cached", + "is_wheel_cached", + "expect_artifact_building", + "expect_artifact_download", + ), + [ + (True, False, True, False), + (True, True, False, False), + (False, False, True, True), + (False, True, False, True), + ], +) +def test_executor_should_write_pep610_url_references_for_non_wheel_urls( + tmp_venv: VirtualEnv, + pool: RepositoryPool, + config: Config, + io: BufferedIO, + mock_file_downloads: None, + mocker: MockerFixture, + fixture_dir: FixtureDirGetter, + is_sdist_cached: bool, + is_wheel_cached: bool, + expect_artifact_building: bool, + expect_artifact_download: bool, +): + built_wheel = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" + mock_prepare = mocker.patch( + "poetry.installation.chef.Chef._prepare", + return_value=built_wheel, + ) + download_spy = mocker.spy(Executor, "_download_archive") + + if is_sdist_cached | is_wheel_cached: + cached_sdist = fixture_dir("distributions") / "demo-0.1.0.tar.gz" + cached_wheel = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" + + def mock_get_cached_archive_for_link_func(_: Link, strict: bool): + if is_wheel_cached and not strict: + return cached_wheel + if is_sdist_cached: + return cached_sdist + return None + + mocker.patch( + "poetry.installation.chef.Chef.get_cached_archive_for_link", + side_effect=mock_get_cached_archive_for_link_func, + ) + + package = Package( + "demo", + "0.1.0", + source_type="url", + source_url="https://files.pythonhosted.org/demo-0.1.0.tar.gz", + ) + # Set package.files so the executor will attempt to hash the package + package.files = [ + { + "file": "demo-0.1.0.tar.gz", + "hash": "sha256:9fa123ad707a5c6c944743bf3e11a0e80d86cb518d3cf25320866ca3ef43e2ad", # noqa: E501 + } + ] + + executor = Executor(tmp_venv, pool, config, io) + operation = Install(package) + executor.execute([operation]) + expected_url_reference = { + "archive_info": { + "hashes": { + "sha256": ( + "9fa123ad707a5c6c944743bf3e11a0e80d86cb518d3cf25320866ca3ef43e2ad" + ) + }, + }, + "url": package.source_url, + } + verify_installed_distribution(tmp_venv, package, expected_url_reference) + + if expect_artifact_building: + mock_prepare.assert_called_once() + else: + mock_prepare.assert_not_called() + + if expect_artifact_download: + download_spy.assert_called_once_with( + mocker.ANY, operation, Link(package.source_url) + ) + else: + download_spy.assert_not_called() def test_executor_should_write_pep610_url_references_for_git( @@ -846,38 +1000,6 @@ def test_executor_should_write_pep610_url_references_for_git_with_subdirectories ) -def test_executor_should_use_cached_link_and_hash( - tmp_venv: VirtualEnv, - pool: RepositoryPool, - config: Config, - io: BufferedIO, - mocker: MockerFixture, - fixture_dir: FixtureDirGetter, -): - link_cached = fixture_dir("distributions") / "demo-0.1.0-py2.py3-none-any.whl" - - mocker.patch( - "poetry.installation.chef.Chef.get_cached_archive_for_link", - return_value=link_cached, - ) - - package = Package("demo", "0.1.0") - # Set package.files so the executor will attempt to hash the package - package.files = [ - { - "file": "demo-0.1.0-py2.py3-none-any.whl", - "hash": "sha256:70e704135718fffbcbf61ed1fc45933cfd86951a744b681000eaaa75da31f17a", # noqa: E501 - } - ] - - executor = Executor(tmp_venv, pool, config, io) - archive = executor._download_link( - Install(package), - Link("https://example.com/demo-0.1.0-py2.py3-none-any.whl"), - ) - assert archive == link_cached - - @pytest.mark.parametrize( ("max_workers", "cpu_count", "side_effect", "expected_workers"), [