From 02d6c712f9888dce952c9a82e0c69d0230cc94e5 Mon Sep 17 00:00:00 2001 From: Ben Alkov Date: Wed, 21 Feb 2024 17:18:39 -0500 Subject: [PATCH] toil(package managers:pip): refactor... - `_download_dependencies()` - `_process_package_distributions()` - `DistributionPackageInfo` Also adjust unit tests Signed-off-by: Ben Alkov --- cachi2/core/package_managers/pip.py | 279 ++++++++++++------ .../test_data/pip_multiple/bom.json | 176 ----------- .../test_data/pip_with_deps/bom.json | 4 - tests/unit/package_managers/test_pip.py | 261 +++++++++------- 4 files changed, 332 insertions(+), 388 deletions(-) diff --git a/cachi2/core/package_managers/pip.py b/cachi2/core/package_managers/pip.py index b96fac791..1f5e2776b 100644 --- a/cachi2/core/package_managers/pip.py +++ b/cachi2/core/package_managers/pip.py @@ -122,7 +122,7 @@ def _determine_checksums_to_verify(self) -> set[ChecksumInfo]: log.debug("%s: %s", self.path.name, msg) return checksums - def should_download_wheel(self) -> bool: + def should_download(self) -> bool: """Determine if this artifact should be downloaded. If the user specified any checksums, but they do not match with those @@ -1453,8 +1453,110 @@ def _split_hashes_from_options(cls, options: list[str]) -> tuple[list[str], list return hashes, reduced_options +def _process_req( + req: PipRequirement, + requirements_file: PipRequirementsFile, + require_hashes: bool, + pip_deps_dir: RootedPath, + download_info: dict[str, Any], + dpi: Optional[DistributionPackageInfo] = None, +) -> dict[str, Any]: + download_info["kind"] = req.kind + download_info["requirement_file"] = str(requirements_file.file_path.subpath_from_root) + download_info["hash_verified"] = False + + def _hash_verify(path: Path, checksum_info: Iterable[ChecksumInfo]) -> bool: + verified: bool = False + try: + must_match_any_checksum(path, checksum_info) + verified = True + except PackageRejected: + path.unlink() + log.warning("Download '%s' was removed from the output directory", path.name) + return verified + + if dpi: + if dpi.should_verify_checksums(): + download_info["hash_verified"] = _hash_verify(dpi.path, dpi.checksums_to_verify) + + if dpi.package_type == "sdist": + _check_metadata_in_sdist(dpi.path) + + else: + if require_hashes or req.kind == "url": + hashes = req.hashes or [req.qualifiers.get("cachito_hash", "")] + download_info["hash_verified"] = _hash_verify( + download_info["path"], list(map(_to_checksum_info, hashes)) + ) + + log.debug( + "Successfully processed '%s' in path '%s'", + req.download_line, + download_info["path"].relative_to(pip_deps_dir.root), + ) + + return download_info + + +def _process_pypi_req( + req: PipRequirement, + requirements_file: PipRequirementsFile, + require_hashes: bool, + pip_deps_dir: RootedPath, + allow_binary: bool, +) -> list[dict[str, Any]]: + download_infos: list[dict[str, Any]] = [] + + artifacts: list[DistributionPackageInfo] = _process_package_distributions( + req, pip_deps_dir, allow_binary + ) + + files: dict[str, Union[str, PathLike[str]]] = { + dpi.url: dpi.path for dpi in artifacts if not dpi.path.exists() + } + asyncio.run(async_download_files(files, get_config().concurrency_limit)) + + for artifact in artifacts: + download_infos.append( + _process_req( + req, + requirements_file, + require_hashes, + pip_deps_dir, + artifact.download_info, + dpi=artifact, + ) + ) + + return download_infos + + +def _process_vcs_req( + req: PipRequirement, pip_deps_dir: RootedPath, **kwargs: Any +) -> dict[str, Any]: + return _process_req( + req, + pip_deps_dir=pip_deps_dir, + download_info=_download_vcs_package(req, pip_deps_dir), + **kwargs, + ) + + +def _process_url_req( + req: PipRequirement, pip_deps_dir: RootedPath, trusted_hosts: set[str], **kwargs: Any +) -> dict[str, Any]: + return _process_req( + req, + pip_deps_dir=pip_deps_dir, + download_info=_download_url_package(req, pip_deps_dir, trusted_hosts), + **kwargs, + ) + + def _download_dependencies( - output_dir: RootedPath, requirements_file: PipRequirementsFile, allow_binary: bool = False + output_dir: RootedPath, + requirements_file: PipRequirementsFile, + allow_binary: bool = False, ) -> list[dict[str, Any]]: """ Download artifacts of all dependency packages in a requirements.txt file. @@ -1466,8 +1568,9 @@ def _download_dependencies( (and more based on kind, see _download_*_package functions for more details) :rtype: list[dict] """ - options = _process_options(requirements_file.options) + options: dict[str, Any] = _process_options(requirements_file.options) trusted_hosts = set(options["trusted_hosts"]) + processed: list[dict[str, Any]] = [] if options["require_hashes"]: log.info("Global --require-hashes option used, will require hashes") @@ -1484,77 +1587,44 @@ def _download_dependencies( _validate_requirements(requirements_file.requirements) _validate_provided_hashes(requirements_file.requirements, require_hashes) - pip_deps_dir = output_dir.join_within_root("deps", "pip") + pip_deps_dir: RootedPath = output_dir.join_within_root("deps", "pip") pip_deps_dir.path.mkdir(parents=True, exist_ok=True) - downloaded: list[dict[str, Any]] = [] - to_download: list[DistributionPackageInfo] = [] - for req in requirements_file.requirements: - log.info("Downloading %s", req.download_line) - + log.info("-- Processing requirement line '%s'", req.download_line) if req.kind == "pypi": - source, wheels = _process_package_distributions(req, pip_deps_dir, allow_binary) - if allow_binary: - # check if artifact already exists locally - to_download.extend(w for w in wheels if not w.path.exists()) - - if source is None: - # at least one wheel exists -> report in the SBOM - downloaded.append( - { - "package": req.package, - "version": req.version_specs[0][1], - "kind": req.kind, - "hash_verified": require_hashes, - "requirement_file": str(requirements_file.file_path.subpath_from_root), - } - ) - continue - - download_binary_file(source.url, source.path, auth=None) - _check_metadata_in_sdist(source.path) - download_info = source.download_info - + download_infos: list[dict[str, Any]] = _process_pypi_req( + req, + requirements_file=requirements_file, + require_hashes=require_hashes, + pip_deps_dir=pip_deps_dir, + allow_binary=allow_binary, + ) + processed.extend(download_infos) elif req.kind == "vcs": - download_info = _download_vcs_package(req, pip_deps_dir) + download_info = _process_vcs_req( + req, + requirements_file=requirements_file, + require_hashes=require_hashes, + pip_deps_dir=pip_deps_dir, + ) + processed.append(download_info) elif req.kind == "url": - download_info = _download_url_package(req, pip_deps_dir, trusted_hosts) + download_info = _process_url_req( + req, + requirements_file=requirements_file, + require_hashes=require_hashes, + pip_deps_dir=pip_deps_dir, + trusted_hosts=trusted_hosts, + ) + processed.append(download_info) else: # Should not happen - raise RuntimeError(f"Unexpected requirement kind: {req.kind!r}") - - log.info( - "Successfully downloaded %s to %s", - req.download_line, - download_info["path"].relative_to(output_dir), - ) - - if require_hashes or req.kind == "url": - hashes = req.hashes or [req.qualifiers["cachito_hash"]] - must_match_any_checksum(download_info["path"], list(map(_to_checksum_info, hashes))) - download_info["hash_verified"] = True - else: - download_info["hash_verified"] = False - - download_info["kind"] = req.kind - download_info["requirement_file"] = str(requirements_file.file_path.subpath_from_root) - downloaded.append(download_info) - - if allow_binary: - log.info("Downloading %d wheel(s) ...", len(to_download)) - files: dict[str, Union[str, PathLike[str]]] = {pkg.url: pkg.path for pkg in to_download} - asyncio.run(async_download_files(files, get_config().concurrency_limit)) + raise RuntimeError(f"Unexpected requirement kind: '{req.kind!r}'") - for pkg in to_download: - try: - if pkg.should_verify_checksums(): - must_match_any_checksum(pkg.path, pkg.checksums_to_verify) - except PackageRejected: - pkg.path.unlink() - log.warning("Download '%s' was removed from the output directory", pkg.path.name) + log.info("-- Finished processing requirement line '%s'\n", req.download_line) - return downloaded + return processed def _process_options(options: list[str]) -> dict[str, Any]: @@ -1741,51 +1811,62 @@ def _validate_provided_hashes(requirements: list[PipRequirement], require_hashes def _process_package_distributions( requirement: PipRequirement, pip_deps_dir: RootedPath, allow_binary: bool = False -) -> tuple[Optional[DistributionPackageInfo], list[DistributionPackageInfo]]: +) -> list[DistributionPackageInfo]: """ - Return a DistributionPackageInfo object | a list of DPI objects, for the provided pip package. + Return a list of DPI objects for the provided pip package. Scrape the package's PyPI page and generate a list of all available - artifacts. - Filter by version and allowed artifact type. - Filter to find the best matching sdist artifact. - Process wheel artifacts. + artifacts. Filter by version and allowed artifact type. Filter to find the + best matching sdist artifact. Process wheel artifacts. + + A note on nomenclature (to address a common misconception) + + To strictly follow Python packaging terminology, we should avoid "source", + since it is an overloaded term - so rather than talking about "source" vs. + "wheel" we should use "sdist" vs "wheel" instead. + _sdist_ - "source distribution": a tarball (usually) of a project's entire repo + _wheel_ - built distribution: a stripped-down version of sdist, containing + **only** Python modules and necessary application data which are needed to + install and run the application (note that it doesn't need to and commonly + won't include Python bytecode modules *.pyc) :param requirement: which pip package to process :param str pip_deps_dir: :param bool allow_binary: process wheels? - :return: a single DistributionPackageInfo, or a list of DPI - :rtype: DistributionPackageInfo + :return: a list of DPI + :rtype: list[DistributionPackageInfo] """ + allowed_distros = ["sdist", "wheel"] if allow_binary else ["sdist"] + client = pypi_simple.PyPISimple() + processed_dpis: list[DistributionPackageInfo] = [] name = requirement.package version = requirement.version_specs[0][1] normalized_version = canonicalize_version(version) + sdists: list[DistributionPackageInfo] = [] + user_checksums = set(map(_to_checksum_info, requirement.hashes)) + wheels: list[DistributionPackageInfo] = [] - client = pypi_simple.PyPISimple() try: timeout = get_config().requests_timeout project_page = client.get_project_page(name, timeout) - packages = project_page.packages + packages: list[pypi_simple.DistributionPackage] = project_page.packages except (requests.RequestException, pypi_simple.NoSuchProjectError) as e: raise FetchError(f"PyPI query failed: {e}") - allowed_distros = ["sdist", "wheel"] if allow_binary else ["sdist"] - filtered_packages = filter( - lambda x: x.version is not None - and canonicalize_version(x.version) == normalized_version - and x.package_type is not None - and x.package_type in allowed_distros, - packages, - ) - - sdists: list[DistributionPackageInfo] = [] - wheels: list[DistributionPackageInfo] = [] + def _is_valid(pkg: pypi_simple.DistributionPackage) -> bool: + return ( + pkg.version is not None + and canonicalize_version(pkg.version) == normalized_version + and pkg.package_type is not None + and pkg.package_type in allowed_distros + ) - user_checksums = set(map(_to_checksum_info, requirement.hashes)) + for package in packages: + if not _is_valid(package): + continue - for package in filtered_packages: - pypi_checksums = { + pypi_checksums: set[ChecksumInfo] = { ChecksumInfo(algorithm, digest) for algorithm, digest in package.digests.items() } @@ -1800,15 +1881,15 @@ def _process_package_distributions( user_checksums, ) - if dpi.package_type == "sdist": - sdists.append(dpi) - else: - if dpi.should_download_wheel(): - wheels.append(dpi) + if dpi.should_download(): + if dpi.package_type == "sdist": + sdists.append(dpi) else: - log.info("Filtering out %s due to checksum mismatch", package.filename) + wheels.append(dpi) + else: + log.info("Filtering out %s due to checksum mismatch", package.filename) - if len(sdists) != 0: + if sdists: best_sdist = max(sdists, key=_sdist_preference) if best_sdist.is_yanked: raise PackageRejected( @@ -1820,9 +1901,10 @@ def _process_package_distributions( "Otherwise, you may need to pin to the previous version." ), ) + if best_sdist: + processed_dpis.append(best_sdist) else: log.warning("No sdist found for package %s==%s", name, version) - best_sdist = None if len(wheels) == 0: if allow_binary: @@ -1840,14 +1922,15 @@ def _process_package_distributions( "Alternatively, allow the use of wheels." ) docs = PIP_NO_SDIST_DOC - raise PackageRejected( f"No distributions found for package {name}=={version}", solution=solution, docs=docs, ) - return best_sdist, wheels + processed_dpis.extend(wheels) + + return processed_dpis def _sdist_preference(sdist_pkg: DistributionPackageInfo) -> tuple[int, int]: diff --git a/tests/integration/test_data/pip_multiple/bom.json b/tests/integration/test_data/pip_multiple/bom.json index 44afc8e62..bf32615b9 100644 --- a/tests/integration/test_data/pip_multiple/bom.json +++ b/tests/integration/test_data/pip_multiple/bom.json @@ -7,10 +7,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/aiowsgi@0.7", @@ -49,10 +45,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/async-timeout@4.0.3", @@ -65,10 +57,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/blinker@1.6.2", @@ -81,10 +69,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/certifi@2023.7.22", @@ -97,10 +81,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/cffi@1.15.1", @@ -113,10 +93,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/charset-normalizer@3.2.0", @@ -129,14 +105,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "third_pkg/requirements.txt" } ], "purl": "pkg:pypi/click@8.1.7", @@ -149,10 +117,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/cryptography@41.0.3", @@ -165,10 +129,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/dnspython@2.4.2", @@ -181,10 +141,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/ecdsa@0.18.0", @@ -209,10 +165,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/flask-cors@4.0.0", @@ -225,10 +177,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/flask@2.3.3", @@ -241,10 +189,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/future@0.18.3", @@ -257,10 +201,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/futures@3.0.5", @@ -273,10 +213,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/gevent@23.7.0", @@ -289,10 +225,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/greenlet@2.0.2", @@ -305,10 +237,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/gunicorn@21.2.0", @@ -321,10 +249,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/idna@3.4", @@ -337,10 +261,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/importlib-metadata@6.8.0", @@ -353,10 +273,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/itsdangerous@2.1.2", @@ -369,10 +285,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/jinja2@3.1.2", @@ -385,10 +297,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/jsonpatch@1.33", @@ -401,10 +309,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/jsonpointer@2.4", @@ -417,10 +321,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/markupsafe@2.1.3", @@ -433,10 +333,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/packaging@23.1", @@ -449,10 +345,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/pathspec@0.11.2", @@ -465,10 +357,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/pycparser@2.21", @@ -481,10 +369,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/pyopenssl@23.2.0", @@ -497,10 +381,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/python-etcd@0.4.5", @@ -513,10 +393,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/pyyaml@6.0.1", @@ -529,10 +405,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/redis@5.0.0", @@ -545,10 +417,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/requests@2.31.0", @@ -573,10 +441,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/semantic-version@2.10.0", @@ -589,10 +453,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/six@1.16.0", @@ -605,10 +465,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/tabulate@0.9.0", @@ -621,10 +477,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/termcolor@2.3.0", @@ -649,10 +501,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/urllib3@1.21.1", @@ -665,10 +513,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/waitress@2.1.2", @@ -681,10 +525,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/webob@1.8.7", @@ -697,10 +537,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/werkzeug@2.3.7", @@ -713,10 +549,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/zipp@3.16.2", @@ -729,10 +561,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/zope-event@5.0", @@ -745,10 +573,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "second_pkg/requirements.txt" } ], "purl": "pkg:pypi/zope-interface@6.0", diff --git a/tests/integration/test_data/pip_with_deps/bom.json b/tests/integration/test_data/pip_with_deps/bom.json index 689635b25..98cfb5dce 100644 --- a/tests/integration/test_data/pip_with_deps/bom.json +++ b/tests/integration/test_data/pip_with_deps/bom.json @@ -7,10 +7,6 @@ { "name": "cachi2:found_by", "value": "cachi2" - }, - { - "name": "cachi2:missing_hash:in_file", - "value": "requirements.txt" } ], "purl": "pkg:pypi/aiowsgi@0.7", diff --git a/tests/unit/package_managers/test_pip.py b/tests/unit/package_managers/test_pip.py index 6fe77ed8b..e1a61b60b 100644 --- a/tests/unit/package_managers/test_pip.py +++ b/tests/unit/package_managers/test_pip.py @@ -2711,11 +2711,11 @@ def test_process_existing_package_without_source_distributions( None, None, ) - source, wheels = pip._process_package_distributions( + artifacts = pip._process_package_distributions( mock_requirement, rooted_tmp_path, allow_binary=True ) - assert source is None - assert len(wheels) == 2 + assert artifacts[0].package_type != "sdist" + assert len(artifacts) == 2 assert f"No sdist found for package {package_name}=={version}" in caplog.text @pytest.mark.parametrize("allow_binary", (True, False)) @@ -2749,10 +2749,8 @@ def test_process_existing_package_without_any_distributions( ) else: assert str(exc_info.value.solution) == ( - "It seems that this version does not exist or isn't published as an" - " sdist.\n" - "Try to specify the dependency directly via a URL instead, for example," - " the tarball for a GitHub release.\n" + "It seems that this version does not exist or isn't published as an sdist.\n" + "Try to specify the dependency directly via a URL instead, for example, the tarball for a GitHub release.\n" "Alternatively, allow the use of wheels." ) @@ -2817,7 +2815,7 @@ def test_process_package_distributions_with_checksums( None, None, ) - _, wheels = pip._process_package_distributions( + artifacts = pip._process_package_distributions( mock_requirement, rooted_tmp_path, allow_binary=True ) @@ -2826,13 +2824,13 @@ def test_process_package_distributions_with_checksums( f"{package_name}: using intersection of user specified and PyPI reported checksums" in caplog.text ) - assert wheels[0].checksums_to_verify == set( + assert artifacts[1].checksums_to_verify == set( [ChecksumInfo("sha128", "abcdef"), ChecksumInfo("sha256", "abcdef")] ) elif use_user_hashes and not use_pypi_digests: assert f"{package_name}: using user specified checksums" in caplog.text - assert wheels[0].checksums_to_verify == set( + assert artifacts[1].checksums_to_verify == set( [ ChecksumInfo("sha128", "abcdef"), ChecksumInfo("sha256", "abcdef"), @@ -2842,7 +2840,7 @@ def test_process_package_distributions_with_checksums( elif use_pypi_digests and not use_user_hashes: assert f"{package_name}: using PyPI reported checksums" in caplog.text - assert wheels[0].checksums_to_verify == set( + assert artifacts[1].checksums_to_verify == set( [ ChecksumInfo("sha128", "abcdef"), ChecksumInfo("sha256", "abcdef"), @@ -2855,7 +2853,7 @@ def test_process_package_distributions_with_checksums( f"{package_name}: no checksums reported by PyPI or specified by the user" in caplog.text ) - assert wheels[0].checksums_to_verify == set() + assert artifacts[1].checksums_to_verify == set() @mock.patch.object(pypi_simple.PyPISimple, "get_project_page") def test_process_package_distributions_with_different_checksums( @@ -2882,11 +2880,11 @@ def test_process_package_distributions_with_different_checksums( None, ) - _, wheels = pip._process_package_distributions( + artifacts = pip._process_package_distributions( mock_requirement, rooted_tmp_path, allow_binary=True ) - assert len(wheels) == 0 + assert len(artifacts) == 1 assert f"Filtering out {package_name} due to checksum mismatch" in caplog.text @pytest.mark.parametrize( @@ -2937,10 +2935,10 @@ def test_process_package_distributions_noncanonical_version( None, ) - source, wheels = pip._process_package_distributions(mock_requirement, rooted_tmp_path) - assert source is not None - assert source.version == requested_version - assert all(w.version == requested_version for w in wheels) + artifacts = pip._process_package_distributions(mock_requirement, rooted_tmp_path) + assert artifacts[0].package_type == "sdist" + assert artifacts[0].version == requested_version + assert all(w.version == requested_version for w in artifacts[1:]) def test_sdist_sorting(self) -> None: """Test that sdist preference key can be used for sorting in the expected order.""" @@ -3305,14 +3303,14 @@ def test_malformed_hash(self, requirement_kind: str, hash_in_url: bool) -> None: @mock.patch("cachi2.core.package_managers.pip._check_metadata_in_sdist") def test_download_dependencies( self, - check_metadata_in_sdist: mock.Mock, - download_binary_file: mock.Mock, - async_download_files: mock.Mock, - mock_path_unlink: mock.Mock, - mock_match_checksum: mock.Mock, - mock_url_download: mock.Mock, - mock_vcs_download: mock.Mock, - mock_distributions: mock.Mock, + mock_check_metadata_in_sdist: mock.Mock, + mock_download_binary_file: mock.Mock, + mock_async_download_files: mock.Mock, + mock_unlink: mock.Mock, + mock_must_match_any_checksum: mock.Mock, + mock_download_url_package: mock.Mock, + mock_download_vcs_package: mock.Mock, + mock_process_package_distributions: mock.Mock, use_hashes: bool, trusted_hosts: list[str], allow_binary: bool, @@ -3358,7 +3356,10 @@ def test_download_dependencies( pip_deps = rooted_tmp_path.join_within_root("deps", "pip") - pypi_download = pip_deps.join_within_root("foo-1.0.tar.gz").path + sdist_download = pip_deps.join_within_root("foo-1.0.tar.gz").path + wheel_0_download = pip_deps.join_within_root("foo-1.0-cp35-many-linux.whl").path + wheel_1_download = pip_deps.join_within_root("foo-1.0-cp25-win32.whl").path + wheel_2_download = pip_deps.join_within_root("foo-1.0-any.whl").path vcs_download = pip_deps.join_within_root( "github.com", "spam", @@ -3386,146 +3387,186 @@ def test_download_dependencies( "requirement_file": str(req_file.file_path.subpath_from_root), } - source_package = pip.DistributionPackageInfo( - "foo", "1.0", "sdist", pypi_download, "", False + sdist_DPI = pip.DistributionPackageInfo( + "foo", + "1.0", + "sdist", + sdist_download, + "", + False, + pypi_checksums={ChecksumInfo("sha256", "abcdef")}, ) + sdist_d_i = sdist_DPI.download_info | { + "kind": "pypi", + "requirement_file": str(req_file.file_path.subpath_from_root), + "hash_verified": True, + } + pypi_downloads = [sdist_d_i] + wheel_downloads = [] - w1_path = pip_deps.join_within_root("foo-1.0-cp25-win32.whl").path - w2_path = pip_deps.join_within_root("foo-1.0-cp35-many-linux.whl").path - w3_path = pip_deps.join_within_root("foo-1.0-any.whl").path - - wheels = [] + wheels_DPI = [] if allow_binary: - wheels = [ - pip.DistributionPackageInfo( - "foo", - "1.0", - "wheel", - w1_path, - "", - False, - pypi_checksums={ChecksumInfo("sha256", "abcdef")}, - ), - pip.DistributionPackageInfo( + for wheel_path, checksums, hash_verified in zip( + [wheel_0_download, wheel_1_download, wheel_2_download], + [{ChecksumInfo("sha256", "fedcba")}, {ChecksumInfo("sha256", "abcdef")}, set()], + [False, True, False], + ): + dpi = pip.DistributionPackageInfo( "foo", "1.0", "wheel", - w2_path, + wheel_path, "", False, - pypi_checksums={ChecksumInfo("sha256", "fedcba")}, - ), - pip.DistributionPackageInfo("foo", "1.0", "wheel", w3_path, "", False), - ] + pypi_checksums=checksums, # type: ignore + ) + wheels_DPI.append(dpi) + wheel_downloads.append( + dpi.download_info + | { + "kind": "pypi", + "requirement_file": str(req_file.file_path.subpath_from_root), + "hash_verified": hash_verified, + } + ) - mock_distributions.return_value = source_package, wheels - mock_vcs_download.return_value = deepcopy(vcs_info) - mock_url_download.return_value = deepcopy(url_info) + pypi_downloads.extend(wheel_downloads) + mock_process_package_distributions.return_value = [sdist_DPI] + wheels_DPI + mock_download_vcs_package.return_value = deepcopy(vcs_info) + mock_download_url_package.return_value = deepcopy(url_info) - if use_hashes: - mock_match_checksum.side_effect = [ - # pypi_download - None, - # vcs_download - None, - # url_download - None, - # 1. wheel - checksums OK - None, - # 2. wheel - checksums NOK - PackageRejected("", solution=None), - # 3. wheel - no checksums to verify + if use_hashes and not allow_binary: + mock_must_match_any_checksum.side_effect = [ + None, # sdist_download + None, # vcs_download + None, # url_download + ] + elif use_hashes and allow_binary: + mock_must_match_any_checksum.side_effect = [ + None, # sdist_download + PackageRejected("", solution=None), # wheel_0_download - checksums NOK + None, # wheel_1_download - checksums OK + None, # vcs_download + None, # url_download + # wheel_2_download - no checksums to verify + ] + elif not allow_binary: + mock_must_match_any_checksum.side_effect = [ + None, # sdist_download + None, # url_download ] else: - mock_match_checksum.side_effect = [None, None, PackageRejected("", solution=None)] + mock_must_match_any_checksum.side_effect = [ + None, # sdist_download + PackageRejected("", solution=None), # wheel_0_download - checksums NOK + None, # wheel_1_download - checksums OK + None, # url_download + ] # # downloads = pip._download_dependencies(rooted_tmp_path, req_file, allow_binary) - assert downloads == [ - source_package.download_info - | { - "kind": "pypi", - "hash_verified": use_hashes, - "requirement_file": str(req_file.file_path.subpath_from_root), - }, + expected_downloads = pypi_downloads + [ vcs_info | {"kind": "vcs"}, url_info | {"kind": "url"}, ] + assert downloads == expected_downloads assert pip_deps.path.is_dir() # # - check_metadata_in_sdist.assert_called_once_with(source_package.path) - mock_distributions.assert_called_once_with(pypi_req, pip_deps, allow_binary) - mock_vcs_download.assert_called_once_with(vcs_req, pip_deps) - mock_url_download.assert_called_once_with(url_req, pip_deps, set(trusted_hosts)) + mock_check_metadata_in_sdist.assert_called_once_with(sdist_DPI.path) + mock_process_package_distributions.assert_called_once_with(pypi_req, pip_deps, allow_binary) + mock_download_vcs_package.assert_called_once_with(vcs_req, pip_deps) + mock_download_url_package.assert_called_once_with(url_req, pip_deps, set(trusted_hosts)) # # + verify_sdist_checksum_call = mock.call(sdist_download, {ChecksumInfo("sha256", "abcdef")}) + verify_wheel0_checksum_call = mock.call( + wheel_0_download, {ChecksumInfo("sha256", "fedcba")} + ) + verify_wheel1_checksum_call = mock.call( + wheel_1_download, {ChecksumInfo("sha256", "abcdef")} + ) verify_url_checksum_call = mock.call(url_download, [ChecksumInfo("sha256", "654321")]) - if use_hashes: + verify_vcs_checksum_call = mock.call(vcs_download, [ChecksumInfo("sha256", "123456")]) + + if use_hashes and not allow_binary: msg = "At least one dependency uses the --hash option, will require hashes" assert msg in caplog.text verify_checksums_calls = [ - mock.call(pypi_download, [ChecksumInfo("sha256", "abcdef")]), - mock.call(vcs_download, [ChecksumInfo("sha256", "123456")]), + verify_sdist_checksum_call, + verify_vcs_checksum_call, verify_url_checksum_call, ] - else: + elif use_hashes and allow_binary: + msg = "At least one dependency uses the --hash option, will require hashes" + assert msg in caplog.text + + verify_checksums_calls = [ + verify_sdist_checksum_call, + verify_wheel0_checksum_call, + verify_wheel1_checksum_call, + verify_vcs_checksum_call, + verify_url_checksum_call, + ] + elif not allow_binary: msg = "No hash options used, will not require hashes unless HTTP(S) dependencies are present." assert msg in caplog.text # Hashes for URL dependencies should be verified no matter what - verify_checksums_calls = [verify_url_checksum_call] - - if allow_binary: - verify_checksums_calls.extend( - [ - mock.call(w1_path, {ChecksumInfo("sha256", "abcdef")}), - mock.call(w2_path, {ChecksumInfo("sha256", "fedcba")}), - ] - ) + verify_checksums_calls = [verify_sdist_checksum_call, verify_url_checksum_call] + else: + verify_checksums_calls = [ + verify_sdist_checksum_call, + verify_wheel0_checksum_call, + verify_wheel1_checksum_call, + verify_url_checksum_call, + ] - mock_match_checksum.assert_has_calls(verify_checksums_calls) - assert mock_match_checksum.call_count == len(verify_checksums_calls) + mock_must_match_any_checksum.assert_has_calls(verify_checksums_calls) + assert mock_must_match_any_checksum.call_count == len(verify_checksums_calls) # # - assert f"Downloading {pypi_req.download_line}" in caplog.text + assert f"-- Processing requirement line '{pypi_req.download_line}'" in caplog.text assert ( - f"Successfully downloaded {pypi_req.download_line} to deps/pip/foo-1.0.tar.gz" + f"Successfully processed '{pypi_req.download_line}' in path 'deps/pip/foo-1.0.tar.gz'" ) in caplog.text - assert f"Downloading {vcs_req.download_line}" in caplog.text + assert f"-- Processing requirement line '{vcs_req.download_line}'" in caplog.text assert ( - f"Successfully downloaded {vcs_req.download_line} to deps/pip/github.com/spam/eggs/" - f"eggs-external-gitcommit-{GIT_REF}.tar.gz" + f"Successfully processed '{vcs_req.download_line}' in path 'deps/pip/github.com/spam/eggs/" + f"eggs-external-gitcommit-{GIT_REF}.tar.gz'" ) in caplog.text - assert f"Downloading {url_req.download_line}" in caplog.text + assert f"-- Processing requirement line '{url_req.download_line}'" in caplog.text assert ( - f"Successfully downloaded {url_req.download_line} to deps/pip/external-bar/" - f"bar-external-sha256-654321.tar.gz" + f"Successfully processed '{url_req.download_line}' in path 'deps/pip/external-bar/" + f"bar-external-sha256-654321.tar.gz'" ) in caplog.text # # if allow_binary: - assert f"Downloading {len(wheels)} wheel(s) ..." in caplog.text - # wheel #2 does not match any checksums - assert f"The {w2_path.name} is removed from the output directory" in caplog.text + assert f"-- Processing requirement line '{pypi_req.download_line}'" in caplog.text + # wheel 0 does not match any checksums + assert ( + f"Download '{wheel_0_download.name}' was removed from the output directory" + in caplog.text + ) # @mock.patch("cachi2.core.package_managers.pip._process_package_distributions") - @mock.patch("cachi2.core.package_managers.pip.download_binary_file") + @mock.patch("cachi2.core.package_managers.pip.async_download_files") @mock.patch("cachi2.core.package_managers.pip._check_metadata_in_sdist") def test_download_from_requirement_files( self, - check_metadata_in_sdist: mock.Mock, - download_binary_file: mock.Mock, - mock_distributions: mock.Mock, + _check_metadata_in_sdist: mock.Mock, + async_download_files: mock.Mock, + _process_package_distributions: mock.Mock, rooted_tmp_path: RootedPath, ) -> None: """Test downloading dependencies from a requirement file list.""" @@ -3546,7 +3587,7 @@ def test_download_from_requirement_files( "bar", "0.0.1", "sdist", pypi_download2, "", False ) - mock_distributions.side_effect = [(pypi_package1, []), (pypi_package2, [])] + _process_package_distributions.side_effect = [[pypi_package1], [pypi_package2]] downloads = pip._download_from_requirement_files(rooted_tmp_path, [req_file1, req_file2]) assert downloads == [ @@ -3563,7 +3604,7 @@ def test_download_from_requirement_files( "requirement_file": str(req_file2.subpath_from_root), }, ] - check_metadata_in_sdist.assert_has_calls( + _check_metadata_in_sdist.assert_has_calls( [mock.call(pypi_package1.path), mock.call(pypi_package2.path)], any_order=True )