diff --git a/cachi2/core/package_managers/pip.py b/cachi2/core/package_managers/pip.py
index b96fac791..1f5e2776b 100644
--- a/cachi2/core/package_managers/pip.py
+++ b/cachi2/core/package_managers/pip.py
@@ -122,7 +122,7 @@ def _determine_checksums_to_verify(self) -> set[ChecksumInfo]:
log.debug("%s: %s", self.path.name, msg)
return checksums
- def should_download_wheel(self) -> bool:
+ def should_download(self) -> bool:
"""Determine if this artifact should be downloaded.
If the user specified any checksums, but they do not match with those
@@ -1453,8 +1453,110 @@ def _split_hashes_from_options(cls, options: list[str]) -> tuple[list[str], list
return hashes, reduced_options
+def _process_req(
+ req: PipRequirement,
+ requirements_file: PipRequirementsFile,
+ require_hashes: bool,
+ pip_deps_dir: RootedPath,
+ download_info: dict[str, Any],
+ dpi: Optional[DistributionPackageInfo] = None,
+) -> dict[str, Any]:
+ download_info["kind"] = req.kind
+ download_info["requirement_file"] = str(requirements_file.file_path.subpath_from_root)
+ download_info["hash_verified"] = False
+
+ def _hash_verify(path: Path, checksum_info: Iterable[ChecksumInfo]) -> bool:
+ verified: bool = False
+ try:
+ must_match_any_checksum(path, checksum_info)
+ verified = True
+ except PackageRejected:
+ path.unlink()
+ log.warning("Download '%s' was removed from the output directory", path.name)
+ return verified
+
+ if dpi:
+ if dpi.should_verify_checksums():
+ download_info["hash_verified"] = _hash_verify(dpi.path, dpi.checksums_to_verify)
+
+ if dpi.package_type == "sdist":
+ _check_metadata_in_sdist(dpi.path)
+
+ else:
+ if require_hashes or req.kind == "url":
+ hashes = req.hashes or [req.qualifiers.get("cachito_hash", "")]
+ download_info["hash_verified"] = _hash_verify(
+ download_info["path"], list(map(_to_checksum_info, hashes))
+ )
+
+ log.debug(
+ "Successfully processed '%s' in path '%s'",
+ req.download_line,
+ download_info["path"].relative_to(pip_deps_dir.root),
+ )
+
+ return download_info
+
+
+def _process_pypi_req(
+ req: PipRequirement,
+ requirements_file: PipRequirementsFile,
+ require_hashes: bool,
+ pip_deps_dir: RootedPath,
+ allow_binary: bool,
+) -> list[dict[str, Any]]:
+ download_infos: list[dict[str, Any]] = []
+
+ artifacts: list[DistributionPackageInfo] = _process_package_distributions(
+ req, pip_deps_dir, allow_binary
+ )
+
+ files: dict[str, Union[str, PathLike[str]]] = {
+ dpi.url: dpi.path for dpi in artifacts if not dpi.path.exists()
+ }
+ asyncio.run(async_download_files(files, get_config().concurrency_limit))
+
+ for artifact in artifacts:
+ download_infos.append(
+ _process_req(
+ req,
+ requirements_file,
+ require_hashes,
+ pip_deps_dir,
+ artifact.download_info,
+ dpi=artifact,
+ )
+ )
+
+ return download_infos
+
+
+def _process_vcs_req(
+ req: PipRequirement, pip_deps_dir: RootedPath, **kwargs: Any
+) -> dict[str, Any]:
+ return _process_req(
+ req,
+ pip_deps_dir=pip_deps_dir,
+ download_info=_download_vcs_package(req, pip_deps_dir),
+ **kwargs,
+ )
+
+
+def _process_url_req(
+ req: PipRequirement, pip_deps_dir: RootedPath, trusted_hosts: set[str], **kwargs: Any
+) -> dict[str, Any]:
+ return _process_req(
+ req,
+ pip_deps_dir=pip_deps_dir,
+ download_info=_download_url_package(req, pip_deps_dir, trusted_hosts),
+ **kwargs,
+ )
+
+
def _download_dependencies(
- output_dir: RootedPath, requirements_file: PipRequirementsFile, allow_binary: bool = False
+ output_dir: RootedPath,
+ requirements_file: PipRequirementsFile,
+ allow_binary: bool = False,
) -> list[dict[str, Any]]:
"""
Download artifacts of all dependency packages in a requirements.txt file.
@@ -1466,8 +1568,9 @@ def _download_dependencies(
(and more based on kind, see _download_*_package functions for more details)
:rtype: list[dict]
"""
- options = _process_options(requirements_file.options)
+ options: dict[str, Any] = _process_options(requirements_file.options)
trusted_hosts = set(options["trusted_hosts"])
+ processed: list[dict[str, Any]] = []
if options["require_hashes"]:
log.info("Global --require-hashes option used, will require hashes")
@@ -1484,77 +1587,44 @@ def _download_dependencies(
_validate_requirements(requirements_file.requirements)
_validate_provided_hashes(requirements_file.requirements, require_hashes)
- pip_deps_dir = output_dir.join_within_root("deps", "pip")
+ pip_deps_dir: RootedPath = output_dir.join_within_root("deps", "pip")
pip_deps_dir.path.mkdir(parents=True, exist_ok=True)
- downloaded: list[dict[str, Any]] = []
- to_download: list[DistributionPackageInfo] = []
-
for req in requirements_file.requirements:
- log.info("Downloading %s", req.download_line)
-
+ log.info("-- Processing requirement line '%s'", req.download_line)
if req.kind == "pypi":
- source, wheels = _process_package_distributions(req, pip_deps_dir, allow_binary)
- if allow_binary:
- # check if artifact already exists locally
- to_download.extend(w for w in wheels if not w.path.exists())
-
- if source is None:
- # at least one wheel exists -> report in the SBOM
- downloaded.append(
- {
- "package": req.package,
- "version": req.version_specs[0][1],
- "kind": req.kind,
- "hash_verified": require_hashes,
- "requirement_file": str(requirements_file.file_path.subpath_from_root),
- }
- )
- continue
-
- download_binary_file(source.url, source.path, auth=None)
- _check_metadata_in_sdist(source.path)
- download_info = source.download_info
-
+ download_infos: list[dict[str, Any]] = _process_pypi_req(
+ req,
+ requirements_file=requirements_file,
+ require_hashes=require_hashes,
+ pip_deps_dir=pip_deps_dir,
+ allow_binary=allow_binary,
+ )
+ processed.extend(download_infos)
elif req.kind == "vcs":
- download_info = _download_vcs_package(req, pip_deps_dir)
+ download_info = _process_vcs_req(
+ req,
+ requirements_file=requirements_file,
+ require_hashes=require_hashes,
+ pip_deps_dir=pip_deps_dir,
+ )
+ processed.append(download_info)
elif req.kind == "url":
- download_info = _download_url_package(req, pip_deps_dir, trusted_hosts)
+ download_info = _process_url_req(
+ req,
+ requirements_file=requirements_file,
+ require_hashes=require_hashes,
+ pip_deps_dir=pip_deps_dir,
+ trusted_hosts=trusted_hosts,
+ )
+ processed.append(download_info)
else:
# Should not happen
- raise RuntimeError(f"Unexpected requirement kind: {req.kind!r}")
-
- log.info(
- "Successfully downloaded %s to %s",
- req.download_line,
- download_info["path"].relative_to(output_dir),
- )
-
- if require_hashes or req.kind == "url":
- hashes = req.hashes or [req.qualifiers["cachito_hash"]]
- must_match_any_checksum(download_info["path"], list(map(_to_checksum_info, hashes)))
- download_info["hash_verified"] = True
- else:
- download_info["hash_verified"] = False
-
- download_info["kind"] = req.kind
- download_info["requirement_file"] = str(requirements_file.file_path.subpath_from_root)
- downloaded.append(download_info)
-
- if allow_binary:
- log.info("Downloading %d wheel(s) ...", len(to_download))
- files: dict[str, Union[str, PathLike[str]]] = {pkg.url: pkg.path for pkg in to_download}
- asyncio.run(async_download_files(files, get_config().concurrency_limit))
+ raise RuntimeError(f"Unexpected requirement kind: '{req.kind!r}'")
- for pkg in to_download:
- try:
- if pkg.should_verify_checksums():
- must_match_any_checksum(pkg.path, pkg.checksums_to_verify)
- except PackageRejected:
- pkg.path.unlink()
- log.warning("Download '%s' was removed from the output directory", pkg.path.name)
+ log.info("-- Finished processing requirement line '%s'\n", req.download_line)
- return downloaded
+ return processed
def _process_options(options: list[str]) -> dict[str, Any]:
@@ -1741,51 +1811,62 @@ def _validate_provided_hashes(requirements: list[PipRequirement], require_hashes
def _process_package_distributions(
requirement: PipRequirement, pip_deps_dir: RootedPath, allow_binary: bool = False
-) -> tuple[Optional[DistributionPackageInfo], list[DistributionPackageInfo]]:
+) -> list[DistributionPackageInfo]:
"""
- Return a DistributionPackageInfo object | a list of DPI objects, for the provided pip package.
+ Return a list of DPI objects for the provided pip package.
Scrape the package's PyPI page and generate a list of all available
- artifacts.
- Filter by version and allowed artifact type.
- Filter to find the best matching sdist artifact.
- Process wheel artifacts.
+ artifacts. Filter by version and allowed artifact type. Filter to find the
+ best matching sdist artifact. Process wheel artifacts.
+
+ A note on nomenclature (to address a common misconception)
+
+ To strictly follow Python packaging terminology, we should avoid "source",
+ since it is an overloaded term - so rather than talking about "source" vs.
+ "wheel" we should use "sdist" vs "wheel" instead.
+ _sdist_ - "source distribution": a tarball (usually) of a project's entire repo
+ _wheel_ - built distribution: a stripped-down version of sdist, containing
+ **only** Python modules and necessary application data which are needed to
+ install and run the application (note that it doesn't need to and commonly
+ won't include Python bytecode modules *.pyc)
:param requirement: which pip package to process
:param str pip_deps_dir:
:param bool allow_binary: process wheels?
- :return: a single DistributionPackageInfo, or a list of DPI
- :rtype: DistributionPackageInfo
+ :return: a list of DPI
+ :rtype: list[DistributionPackageInfo]
"""
+ allowed_distros = ["sdist", "wheel"] if allow_binary else ["sdist"]
+ client = pypi_simple.PyPISimple()
+ processed_dpis: list[DistributionPackageInfo] = []
name = requirement.package
version = requirement.version_specs[0][1]
normalized_version = canonicalize_version(version)
+ sdists: list[DistributionPackageInfo] = []
+ user_checksums = set(map(_to_checksum_info, requirement.hashes))
+ wheels: list[DistributionPackageInfo] = []
- client = pypi_simple.PyPISimple()
try:
timeout = get_config().requests_timeout
project_page = client.get_project_page(name, timeout)
- packages = project_page.packages
+ packages: list[pypi_simple.DistributionPackage] = project_page.packages
except (requests.RequestException, pypi_simple.NoSuchProjectError) as e:
raise FetchError(f"PyPI query failed: {e}")
- allowed_distros = ["sdist", "wheel"] if allow_binary else ["sdist"]
- filtered_packages = filter(
- lambda x: x.version is not None
- and canonicalize_version(x.version) == normalized_version
- and x.package_type is not None
- and x.package_type in allowed_distros,
- packages,
- )
-
- sdists: list[DistributionPackageInfo] = []
- wheels: list[DistributionPackageInfo] = []
+ def _is_valid(pkg: pypi_simple.DistributionPackage) -> bool:
+ return (
+ pkg.version is not None
+ and canonicalize_version(pkg.version) == normalized_version
+ and pkg.package_type is not None
+ and pkg.package_type in allowed_distros
+ )
- user_checksums = set(map(_to_checksum_info, requirement.hashes))
+ for package in packages:
+ if not _is_valid(package):
+ continue
- for package in filtered_packages:
- pypi_checksums = {
+ pypi_checksums: set[ChecksumInfo] = {
ChecksumInfo(algorithm, digest) for algorithm, digest in package.digests.items()
}
@@ -1800,15 +1881,15 @@ def _process_package_distributions(
user_checksums,
)
- if dpi.package_type == "sdist":
- sdists.append(dpi)
- else:
- if dpi.should_download_wheel():
- wheels.append(dpi)
+ if dpi.should_download():
+ if dpi.package_type == "sdist":
+ sdists.append(dpi)
else:
- log.info("Filtering out %s due to checksum mismatch", package.filename)
+ wheels.append(dpi)
+ else:
+ log.info("Filtering out %s due to checksum mismatch", package.filename)
- if len(sdists) != 0:
+ if sdists:
best_sdist = max(sdists, key=_sdist_preference)
if best_sdist.is_yanked:
raise PackageRejected(
@@ -1820,9 +1901,10 @@ def _process_package_distributions(
"Otherwise, you may need to pin to the previous version."
),
)
+ if best_sdist:
+ processed_dpis.append(best_sdist)
else:
log.warning("No sdist found for package %s==%s", name, version)
- best_sdist = None
if len(wheels) == 0:
if allow_binary:
@@ -1840,14 +1922,15 @@ def _process_package_distributions(
"Alternatively, allow the use of wheels."
)
docs = PIP_NO_SDIST_DOC
-
raise PackageRejected(
f"No distributions found for package {name}=={version}",
solution=solution,
docs=docs,
)
- return best_sdist, wheels
+ processed_dpis.extend(wheels)
+
+ return processed_dpis
def _sdist_preference(sdist_pkg: DistributionPackageInfo) -> tuple[int, int]:
diff --git a/tests/integration/test_data/pip_multiple/bom.json b/tests/integration/test_data/pip_multiple/bom.json
index 44afc8e62..bf32615b9 100644
--- a/tests/integration/test_data/pip_multiple/bom.json
+++ b/tests/integration/test_data/pip_multiple/bom.json
@@ -7,10 +7,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/aiowsgi@0.7",
@@ -49,10 +45,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/async-timeout@4.0.3",
@@ -65,10 +57,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/blinker@1.6.2",
@@ -81,10 +69,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/certifi@2023.7.22",
@@ -97,10 +81,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/cffi@1.15.1",
@@ -113,10 +93,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/charset-normalizer@3.2.0",
@@ -129,14 +105,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "third_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/click@8.1.7",
@@ -149,10 +117,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/cryptography@41.0.3",
@@ -165,10 +129,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/dnspython@2.4.2",
@@ -181,10 +141,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/ecdsa@0.18.0",
@@ -209,10 +165,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/flask-cors@4.0.0",
@@ -225,10 +177,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/flask@2.3.3",
@@ -241,10 +189,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/future@0.18.3",
@@ -257,10 +201,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/futures@3.0.5",
@@ -273,10 +213,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/gevent@23.7.0",
@@ -289,10 +225,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/greenlet@2.0.2",
@@ -305,10 +237,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/gunicorn@21.2.0",
@@ -321,10 +249,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/idna@3.4",
@@ -337,10 +261,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/importlib-metadata@6.8.0",
@@ -353,10 +273,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/itsdangerous@2.1.2",
@@ -369,10 +285,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/jinja2@3.1.2",
@@ -385,10 +297,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/jsonpatch@1.33",
@@ -401,10 +309,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/jsonpointer@2.4",
@@ -417,10 +321,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/markupsafe@2.1.3",
@@ -433,10 +333,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/packaging@23.1",
@@ -449,10 +345,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/pathspec@0.11.2",
@@ -465,10 +357,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/pycparser@2.21",
@@ -481,10 +369,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/pyopenssl@23.2.0",
@@ -497,10 +381,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/python-etcd@0.4.5",
@@ -513,10 +393,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/pyyaml@6.0.1",
@@ -529,10 +405,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/redis@5.0.0",
@@ -545,10 +417,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/requests@2.31.0",
@@ -573,10 +441,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/semantic-version@2.10.0",
@@ -589,10 +453,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/six@1.16.0",
@@ -605,10 +465,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/tabulate@0.9.0",
@@ -621,10 +477,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/termcolor@2.3.0",
@@ -649,10 +501,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/urllib3@1.21.1",
@@ -665,10 +513,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/waitress@2.1.2",
@@ -681,10 +525,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/webob@1.8.7",
@@ -697,10 +537,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/werkzeug@2.3.7",
@@ -713,10 +549,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/zipp@3.16.2",
@@ -729,10 +561,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/zope-event@5.0",
@@ -745,10 +573,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "second_pkg/requirements.txt"
}
],
"purl": "pkg:pypi/zope-interface@6.0",
diff --git a/tests/integration/test_data/pip_with_deps/bom.json b/tests/integration/test_data/pip_with_deps/bom.json
index 689635b25..98cfb5dce 100644
--- a/tests/integration/test_data/pip_with_deps/bom.json
+++ b/tests/integration/test_data/pip_with_deps/bom.json
@@ -7,10 +7,6 @@
{
"name": "cachi2:found_by",
"value": "cachi2"
- },
- {
- "name": "cachi2:missing_hash:in_file",
- "value": "requirements.txt"
}
],
"purl": "pkg:pypi/aiowsgi@0.7",
diff --git a/tests/unit/package_managers/test_pip.py b/tests/unit/package_managers/test_pip.py
index 6fe77ed8b..e1a61b60b 100644
--- a/tests/unit/package_managers/test_pip.py
+++ b/tests/unit/package_managers/test_pip.py
@@ -2711,11 +2711,11 @@ def test_process_existing_package_without_source_distributions(
None,
None,
)
- source, wheels = pip._process_package_distributions(
+ artifacts = pip._process_package_distributions(
mock_requirement, rooted_tmp_path, allow_binary=True
)
- assert source is None
- assert len(wheels) == 2
+ assert artifacts[0].package_type != "sdist"
+ assert len(artifacts) == 2
assert f"No sdist found for package {package_name}=={version}" in caplog.text
@pytest.mark.parametrize("allow_binary", (True, False))
@@ -2749,10 +2749,8 @@ def test_process_existing_package_without_any_distributions(
)
else:
assert str(exc_info.value.solution) == (
- "It seems that this version does not exist or isn't published as an"
- " sdist.\n"
- "Try to specify the dependency directly via a URL instead, for example,"
- " the tarball for a GitHub release.\n"
+ "It seems that this version does not exist or isn't published as an sdist.\n"
+ "Try to specify the dependency directly via a URL instead, for example, the tarball for a GitHub release.\n"
"Alternatively, allow the use of wheels."
)
@@ -2817,7 +2815,7 @@ def test_process_package_distributions_with_checksums(
None,
None,
)
- _, wheels = pip._process_package_distributions(
+ artifacts = pip._process_package_distributions(
mock_requirement, rooted_tmp_path, allow_binary=True
)
@@ -2826,13 +2824,13 @@ def test_process_package_distributions_with_checksums(
f"{package_name}: using intersection of user specified and PyPI reported checksums"
in caplog.text
)
- assert wheels[0].checksums_to_verify == set(
+ assert artifacts[1].checksums_to_verify == set(
[ChecksumInfo("sha128", "abcdef"), ChecksumInfo("sha256", "abcdef")]
)
elif use_user_hashes and not use_pypi_digests:
assert f"{package_name}: using user specified checksums" in caplog.text
- assert wheels[0].checksums_to_verify == set(
+ assert artifacts[1].checksums_to_verify == set(
[
ChecksumInfo("sha128", "abcdef"),
ChecksumInfo("sha256", "abcdef"),
@@ -2842,7 +2840,7 @@ def test_process_package_distributions_with_checksums(
elif use_pypi_digests and not use_user_hashes:
assert f"{package_name}: using PyPI reported checksums" in caplog.text
- assert wheels[0].checksums_to_verify == set(
+ assert artifacts[1].checksums_to_verify == set(
[
ChecksumInfo("sha128", "abcdef"),
ChecksumInfo("sha256", "abcdef"),
@@ -2855,7 +2853,7 @@ def test_process_package_distributions_with_checksums(
f"{package_name}: no checksums reported by PyPI or specified by the user"
in caplog.text
)
- assert wheels[0].checksums_to_verify == set()
+ assert artifacts[1].checksums_to_verify == set()
@mock.patch.object(pypi_simple.PyPISimple, "get_project_page")
def test_process_package_distributions_with_different_checksums(
@@ -2882,11 +2880,11 @@ def test_process_package_distributions_with_different_checksums(
None,
)
- _, wheels = pip._process_package_distributions(
+ artifacts = pip._process_package_distributions(
mock_requirement, rooted_tmp_path, allow_binary=True
)
- assert len(wheels) == 0
+ assert len(artifacts) == 1
assert f"Filtering out {package_name} due to checksum mismatch" in caplog.text
@pytest.mark.parametrize(
@@ -2937,10 +2935,10 @@ def test_process_package_distributions_noncanonical_version(
None,
)
- source, wheels = pip._process_package_distributions(mock_requirement, rooted_tmp_path)
- assert source is not None
- assert source.version == requested_version
- assert all(w.version == requested_version for w in wheels)
+ artifacts = pip._process_package_distributions(mock_requirement, rooted_tmp_path)
+ assert artifacts[0].package_type == "sdist"
+ assert artifacts[0].version == requested_version
+ assert all(w.version == requested_version for w in artifacts[1:])
def test_sdist_sorting(self) -> None:
"""Test that sdist preference key can be used for sorting in the expected order."""
@@ -3305,14 +3303,14 @@ def test_malformed_hash(self, requirement_kind: str, hash_in_url: bool) -> None:
@mock.patch("cachi2.core.package_managers.pip._check_metadata_in_sdist")
def test_download_dependencies(
self,
- check_metadata_in_sdist: mock.Mock,
- download_binary_file: mock.Mock,
- async_download_files: mock.Mock,
- mock_path_unlink: mock.Mock,
- mock_match_checksum: mock.Mock,
- mock_url_download: mock.Mock,
- mock_vcs_download: mock.Mock,
- mock_distributions: mock.Mock,
+ mock_check_metadata_in_sdist: mock.Mock,
+ mock_download_binary_file: mock.Mock,
+ mock_async_download_files: mock.Mock,
+ mock_unlink: mock.Mock,
+ mock_must_match_any_checksum: mock.Mock,
+ mock_download_url_package: mock.Mock,
+ mock_download_vcs_package: mock.Mock,
+ mock_process_package_distributions: mock.Mock,
use_hashes: bool,
trusted_hosts: list[str],
allow_binary: bool,
@@ -3358,7 +3356,10 @@ def test_download_dependencies(
pip_deps = rooted_tmp_path.join_within_root("deps", "pip")
- pypi_download = pip_deps.join_within_root("foo-1.0.tar.gz").path
+ sdist_download = pip_deps.join_within_root("foo-1.0.tar.gz").path
+ wheel_0_download = pip_deps.join_within_root("foo-1.0-cp35-many-linux.whl").path
+ wheel_1_download = pip_deps.join_within_root("foo-1.0-cp25-win32.whl").path
+ wheel_2_download = pip_deps.join_within_root("foo-1.0-any.whl").path
vcs_download = pip_deps.join_within_root(
"github.com",
"spam",
@@ -3386,146 +3387,186 @@ def test_download_dependencies(
"requirement_file": str(req_file.file_path.subpath_from_root),
}
- source_package = pip.DistributionPackageInfo(
- "foo", "1.0", "sdist", pypi_download, "", False
+ sdist_DPI = pip.DistributionPackageInfo(
+ "foo",
+ "1.0",
+ "sdist",
+ sdist_download,
+ "",
+ False,
+ pypi_checksums={ChecksumInfo("sha256", "abcdef")},
)
+ sdist_d_i = sdist_DPI.download_info | {
+ "kind": "pypi",
+ "requirement_file": str(req_file.file_path.subpath_from_root),
+ "hash_verified": True,
+ }
+ pypi_downloads = [sdist_d_i]
+ wheel_downloads = []
- w1_path = pip_deps.join_within_root("foo-1.0-cp25-win32.whl").path
- w2_path = pip_deps.join_within_root("foo-1.0-cp35-many-linux.whl").path
- w3_path = pip_deps.join_within_root("foo-1.0-any.whl").path
-
- wheels = []
+ wheels_DPI = []
if allow_binary:
- wheels = [
- pip.DistributionPackageInfo(
- "foo",
- "1.0",
- "wheel",
- w1_path,
- "",
- False,
- pypi_checksums={ChecksumInfo("sha256", "abcdef")},
- ),
- pip.DistributionPackageInfo(
+ for wheel_path, checksums, hash_verified in zip(
+ [wheel_0_download, wheel_1_download, wheel_2_download],
+ [{ChecksumInfo("sha256", "fedcba")}, {ChecksumInfo("sha256", "abcdef")}, set()],
+ [False, True, False],
+ ):
+ dpi = pip.DistributionPackageInfo(
"foo",
"1.0",
"wheel",
- w2_path,
+ wheel_path,
"",
False,
- pypi_checksums={ChecksumInfo("sha256", "fedcba")},
- ),
- pip.DistributionPackageInfo("foo", "1.0", "wheel", w3_path, "", False),
- ]
+ pypi_checksums=checksums, # type: ignore
+ )
+ wheels_DPI.append(dpi)
+ wheel_downloads.append(
+ dpi.download_info
+ | {
+ "kind": "pypi",
+ "requirement_file": str(req_file.file_path.subpath_from_root),
+ "hash_verified": hash_verified,
+ }
+ )
- mock_distributions.return_value = source_package, wheels
- mock_vcs_download.return_value = deepcopy(vcs_info)
- mock_url_download.return_value = deepcopy(url_info)
+ pypi_downloads.extend(wheel_downloads)
+ mock_process_package_distributions.return_value = [sdist_DPI] + wheels_DPI
+ mock_download_vcs_package.return_value = deepcopy(vcs_info)
+ mock_download_url_package.return_value = deepcopy(url_info)
- if use_hashes:
- mock_match_checksum.side_effect = [
- # pypi_download
- None,
- # vcs_download
- None,
- # url_download
- None,
- # 1. wheel - checksums OK
- None,
- # 2. wheel - checksums NOK
- PackageRejected("", solution=None),
- # 3. wheel - no checksums to verify
+ if use_hashes and not allow_binary:
+ mock_must_match_any_checksum.side_effect = [
+ None, # sdist_download
+ None, # vcs_download
+ None, # url_download
+ ]
+ elif use_hashes and allow_binary:
+ mock_must_match_any_checksum.side_effect = [
+ None, # sdist_download
+ PackageRejected("", solution=None), # wheel_0_download - checksums NOK
+ None, # wheel_1_download - checksums OK
+ None, # vcs_download
+ None, # url_download
+ # wheel_2_download - no checksums to verify
+ ]
+ elif not allow_binary:
+ mock_must_match_any_checksum.side_effect = [
+ None, # sdist_download
+ None, # url_download
]
else:
- mock_match_checksum.side_effect = [None, None, PackageRejected("", solution=None)]
+ mock_must_match_any_checksum.side_effect = [
+ None, # sdist_download
+ PackageRejected("", solution=None), # wheel_0_download - checksums NOK
+ None, # wheel_1_download - checksums OK
+ None, # url_download
+ ]
#
#
downloads = pip._download_dependencies(rooted_tmp_path, req_file, allow_binary)
- assert downloads == [
- source_package.download_info
- | {
- "kind": "pypi",
- "hash_verified": use_hashes,
- "requirement_file": str(req_file.file_path.subpath_from_root),
- },
+ expected_downloads = pypi_downloads + [
vcs_info | {"kind": "vcs"},
url_info | {"kind": "url"},
]
+ assert downloads == expected_downloads
assert pip_deps.path.is_dir()
#
#
- check_metadata_in_sdist.assert_called_once_with(source_package.path)
- mock_distributions.assert_called_once_with(pypi_req, pip_deps, allow_binary)
- mock_vcs_download.assert_called_once_with(vcs_req, pip_deps)
- mock_url_download.assert_called_once_with(url_req, pip_deps, set(trusted_hosts))
+ mock_check_metadata_in_sdist.assert_called_once_with(sdist_DPI.path)
+ mock_process_package_distributions.assert_called_once_with(pypi_req, pip_deps, allow_binary)
+ mock_download_vcs_package.assert_called_once_with(vcs_req, pip_deps)
+ mock_download_url_package.assert_called_once_with(url_req, pip_deps, set(trusted_hosts))
#
#
+ verify_sdist_checksum_call = mock.call(sdist_download, {ChecksumInfo("sha256", "abcdef")})
+ verify_wheel0_checksum_call = mock.call(
+ wheel_0_download, {ChecksumInfo("sha256", "fedcba")}
+ )
+ verify_wheel1_checksum_call = mock.call(
+ wheel_1_download, {ChecksumInfo("sha256", "abcdef")}
+ )
verify_url_checksum_call = mock.call(url_download, [ChecksumInfo("sha256", "654321")])
- if use_hashes:
+ verify_vcs_checksum_call = mock.call(vcs_download, [ChecksumInfo("sha256", "123456")])
+
+ if use_hashes and not allow_binary:
msg = "At least one dependency uses the --hash option, will require hashes"
assert msg in caplog.text
verify_checksums_calls = [
- mock.call(pypi_download, [ChecksumInfo("sha256", "abcdef")]),
- mock.call(vcs_download, [ChecksumInfo("sha256", "123456")]),
+ verify_sdist_checksum_call,
+ verify_vcs_checksum_call,
verify_url_checksum_call,
]
- else:
+ elif use_hashes and allow_binary:
+ msg = "At least one dependency uses the --hash option, will require hashes"
+ assert msg in caplog.text
+
+ verify_checksums_calls = [
+ verify_sdist_checksum_call,
+ verify_wheel0_checksum_call,
+ verify_wheel1_checksum_call,
+ verify_vcs_checksum_call,
+ verify_url_checksum_call,
+ ]
+ elif not allow_binary:
msg = "No hash options used, will not require hashes unless HTTP(S) dependencies are present."
assert msg in caplog.text
# Hashes for URL dependencies should be verified no matter what
- verify_checksums_calls = [verify_url_checksum_call]
-
- if allow_binary:
- verify_checksums_calls.extend(
- [
- mock.call(w1_path, {ChecksumInfo("sha256", "abcdef")}),
- mock.call(w2_path, {ChecksumInfo("sha256", "fedcba")}),
- ]
- )
+ verify_checksums_calls = [verify_sdist_checksum_call, verify_url_checksum_call]
+ else:
+ verify_checksums_calls = [
+ verify_sdist_checksum_call,
+ verify_wheel0_checksum_call,
+ verify_wheel1_checksum_call,
+ verify_url_checksum_call,
+ ]
- mock_match_checksum.assert_has_calls(verify_checksums_calls)
- assert mock_match_checksum.call_count == len(verify_checksums_calls)
+ mock_must_match_any_checksum.assert_has_calls(verify_checksums_calls)
+ assert mock_must_match_any_checksum.call_count == len(verify_checksums_calls)
#
#
- assert f"Downloading {pypi_req.download_line}" in caplog.text
+ assert f"-- Processing requirement line '{pypi_req.download_line}'" in caplog.text
assert (
- f"Successfully downloaded {pypi_req.download_line} to deps/pip/foo-1.0.tar.gz"
+ f"Successfully processed '{pypi_req.download_line}' in path 'deps/pip/foo-1.0.tar.gz'"
) in caplog.text
- assert f"Downloading {vcs_req.download_line}" in caplog.text
+ assert f"-- Processing requirement line '{vcs_req.download_line}'" in caplog.text
assert (
- f"Successfully downloaded {vcs_req.download_line} to deps/pip/github.com/spam/eggs/"
- f"eggs-external-gitcommit-{GIT_REF}.tar.gz"
+ f"Successfully processed '{vcs_req.download_line}' in path 'deps/pip/github.com/spam/eggs/"
+ f"eggs-external-gitcommit-{GIT_REF}.tar.gz'"
) in caplog.text
- assert f"Downloading {url_req.download_line}" in caplog.text
+ assert f"-- Processing requirement line '{url_req.download_line}'" in caplog.text
assert (
- f"Successfully downloaded {url_req.download_line} to deps/pip/external-bar/"
- f"bar-external-sha256-654321.tar.gz"
+ f"Successfully processed '{url_req.download_line}' in path 'deps/pip/external-bar/"
+ f"bar-external-sha256-654321.tar.gz'"
) in caplog.text
#
#
if allow_binary:
- assert f"Downloading {len(wheels)} wheel(s) ..." in caplog.text
- # wheel #2 does not match any checksums
- assert f"The {w2_path.name} is removed from the output directory" in caplog.text
+ assert f"-- Processing requirement line '{pypi_req.download_line}'" in caplog.text
+ # wheel 0 does not match any checksums
+ assert (
+ f"Download '{wheel_0_download.name}' was removed from the output directory"
+ in caplog.text
+ )
#
@mock.patch("cachi2.core.package_managers.pip._process_package_distributions")
- @mock.patch("cachi2.core.package_managers.pip.download_binary_file")
+ @mock.patch("cachi2.core.package_managers.pip.async_download_files")
@mock.patch("cachi2.core.package_managers.pip._check_metadata_in_sdist")
def test_download_from_requirement_files(
self,
- check_metadata_in_sdist: mock.Mock,
- download_binary_file: mock.Mock,
- mock_distributions: mock.Mock,
+ _check_metadata_in_sdist: mock.Mock,
+ async_download_files: mock.Mock,
+ _process_package_distributions: mock.Mock,
rooted_tmp_path: RootedPath,
) -> None:
"""Test downloading dependencies from a requirement file list."""
@@ -3546,7 +3587,7 @@ def test_download_from_requirement_files(
"bar", "0.0.1", "sdist", pypi_download2, "", False
)
- mock_distributions.side_effect = [(pypi_package1, []), (pypi_package2, [])]
+ _process_package_distributions.side_effect = [[pypi_package1], [pypi_package2]]
downloads = pip._download_from_requirement_files(rooted_tmp_path, [req_file1, req_file2])
assert downloads == [
@@ -3563,7 +3604,7 @@ def test_download_from_requirement_files(
"requirement_file": str(req_file2.subpath_from_root),
},
]
- check_metadata_in_sdist.assert_has_calls(
+ _check_metadata_in_sdist.assert_has_calls(
[mock.call(pypi_package1.path), mock.call(pypi_package2.path)], any_order=True
)