From d08369f666363042942cf408b8c180454cf884ee Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Thu, 3 Feb 2022 16:32:37 -0500 Subject: [PATCH] pip_api: initial support for hashed requirements (#126) * pip_api: initial support for hashed requirements * tests: more tests * pip_api, tests: blacken * tests: add a no-op test for --hash * pip_api, tests: enforce hash strictness * pip_api: add a strict_hashes kwarg to parse_requirements * README: update API line * tests: add non-strict asserts * pip_api: simplify strict hash handling * pip_api: make strict_hashes even stricter * README: improve docs * pip_api: forward Requirement and UnparsedRequirement * Update README.md Co-authored-by: Dustin Ingram * README: document all Requirement fields the same Co-authored-by: Dustin Ingram --- README.md | 9 ++- pip_api/__init__.py | 6 +- pip_api/_parse_requirements.py | 54 +++++++++++++-- tests/test_parse_requirements.py | 115 +++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7a2bb47..dcdc4b5 100644 --- a/README.md +++ b/README.md @@ -72,14 +72,19 @@ If the command you are trying to use is not compatible, `pip_api` will raise a > * `Distribution.editable` (`bool`): Whether the distribution is editable or not > Optionally takes a `local` parameter to filter out globally-installed packages -* `pip_api.parse_requirements(filename, options=None, include_invalid=False)` - > Takes a path to a filename of a Requirements file. Returns a mapping from package name to a [`packaging.requirements.Requirement`](https://packaging.pypa.io/en/latest/requirements/#packaging.requirements.Requirement) object with the following attributes: +* `pip_api.parse_requirements(filename, options=None, include_invalid=False, strict_hashes=False)` + > Takes a path to a filename of a Requirements file. Returns a mapping from package name to a `pip_api.Requirement` object (subclass of [`packaging.requirements.Requirement`](https://packaging.pypa.io/en/latest/requirements/#packaging.requirements.Requirement)) with the following attributes: > * `Requirement.name` (`string`): The name of the requirement. > * `Requirement.extras` (`set`): A set of extras that the requirement specifies. > * `Requirement.specifier` ([`packaging.specifiers.SpecifierSet`](https://packaging.pypa.io/en/latest/specifiers/#packaging.specifiers.SpecifierSet)): A `SpecifierSet` of the version specified by the requirement. > * `Requirement.marker` ([`packaging.markers.Marker`](https://packaging.pypa.io/en/latest/markers/#packaging.markers.Marker)): A `Marker` of the marker for the requirement. Can be `None`. + > * `Requirement.hashes` (`dict`): A mapping of hashes for the requirement, corresponding to `--hash=...` options. + > * `Requirement.filename` (`str`): The filename that the requirement originates from. + > * `Requirement.lineno` (`int`): The source line that the requirement was parsed from. + > > Optionally takes an `options` parameter to override the regex used to skip requirements lines. > Optionally takes an `include_invalid` parameter to return an `UnparsedRequirement` in the event that a requirement cannot be parsed correctly. + > Optionally takes a `strict_hashes` parameter to require that all requirements have associated hashes. ### Available with `pip>=8.0.0`: * `pip_api.hash(filename, algorithm='sha256')` diff --git a/pip_api/__init__.py b/pip_api/__init__.py index c4d9c22..28e3372 100644 --- a/pip_api/__init__.py +++ b/pip_api/__init__.py @@ -14,4 +14,8 @@ from pip_api._installed_distributions import installed_distributions # Import these whenever, doesn't matter -from pip_api._parse_requirements import parse_requirements +from pip_api._parse_requirements import ( + Requirement, + UnparsedRequirement, + parse_requirements, +) diff --git a/pip_api/_parse_requirements.py b/pip_api/_parse_requirements.py index 9a9ea61..a23d24d 100644 --- a/pip_api/_parse_requirements.py +++ b/pip_api/_parse_requirements.py @@ -7,6 +7,8 @@ import string import sys +from collections import defaultdict + from typing import Any, Dict, Optional, Union, Tuple from urllib.parse import urljoin, unquote, urlsplit @@ -24,6 +26,7 @@ parser.add_argument("-i", "--index-url") parser.add_argument("--extra-index-url") parser.add_argument("-f", "--find-links") +parser.add_argument("--hash", action="append", dest="hashes") operators = specifiers.Specifier._operators.keys() @@ -37,6 +40,8 @@ re.VERBOSE, ) WINDOWS = sys.platform.startswith("win") or (sys.platform == "cli" and os.name == "nt") +# https://pip.pypa.io/en/stable/cli/pip_hash/ +VALID_HASHES = {"sha256", "sha384", "sha512"} class Link: @@ -172,6 +177,15 @@ def _url_to_path(url): return path +class Requirement(requirements.Requirement): + def __init__(self, *args, **kwargs): + self.hashes = kwargs.pop("hashes", None) + self.filename = kwargs.pop("filename") + self.lineno = kwargs.pop("lineno") + + super().__init__(*args, **kwargs) + + class UnparsedRequirement(object): def __init__(self, name, msg, filename, lineno): self.name = name @@ -445,8 +459,11 @@ def _parse_requirement_url(req_str): def parse_requirements( - filename: os.PathLike, options: Optional[Any] = None, include_invalid: bool = False -) -> Dict[str, Union[requirements.Requirement, UnparsedRequirement]]: + filename: os.PathLike, + options: Optional[Any] = None, + include_invalid: bool = False, + strict_hashes: bool = False, +) -> Dict[str, Union[Requirement, UnparsedRequirement]]: to_parse = {filename} parsed = set() name_to_req = {} @@ -463,8 +480,20 @@ def parse_requirements( lines_enum = _skip_regex(lines_enum, options) for lineno, line in lines_enum: - req: Optional[Union[requirements.Requirement, UnparsedRequirement]] = None + req: Optional[Union[Requirement, UnparsedRequirement]] = None known, _ = parser.parse_known_args(line.strip().split()) + + hashes_by_kind = defaultdict(list) + if known.hashes: + for hsh in known.hashes: + kind, hsh = hsh.split(":", 1) + if kind not in VALID_HASHES: + raise PipError( + "Invalid --hash kind %s, expected one of %s" + % (kind, VALID_HASHES) + ) + hashes_by_kind[kind].append(hsh) + if known.req: req_str = str().join(known.req) try: @@ -477,7 +506,12 @@ def parse_requirements( try: # Try to parse this as a requirement specification if req is None: - req = requirements.Requirement(parsed_req_str) + req = Requirement( + parsed_req_str, + hashes=dict(hashes_by_kind), + filename=filename, + lineno=lineno, + ) except requirements.InvalidRequirement: try: _check_invalid_requirement(req_str) @@ -493,7 +527,9 @@ def parse_requirements( to_parse.add(full_path) elif known.editable: name, url = _parse_editable(known.editable) - req = requirements.Requirement("%s @ %s" % (name, url)) + req = Requirement( + "%s @ %s" % (name, url), filename=filename, lineno=lineno + ) else: pass # This is an invalid requirement @@ -512,4 +548,12 @@ def parse_requirements( % (req, name_to_req[req.name], req.name) ) + if strict_hashes: + missing_hashes = [req for req in name_to_req.values() if not req.hashes] + if len(missing_hashes) > 0: + raise PipError( + "Missing hashes for requirement in %s, line %s" + % (missing_hashes[0].filename, missing_hashes[0].lineno) + ) + return name_to_req diff --git a/tests/test_parse_requirements.py b/tests/test_parse_requirements.py index 77830ab..d915fd9 100644 --- a/tests/test_parse_requirements.py +++ b/tests/test_parse_requirements.py @@ -14,6 +14,7 @@ def test_parse_requirements(monkeypatch): assert set(result) == {"foo"} assert str(result["foo"]) == "foo==1.2.3" + assert result["foo"].hashes == {} def test_parse_requirements_with_comments(monkeypatch): @@ -253,3 +254,117 @@ def test_parse_requirements_with_missing_egg_suffix(monkeypatch): PipError, match=r"Missing egg fragment in URL: " + PEP508_PIP_EXAMPLE_URL ): pip_api.parse_requirements("a.txt") + + +def test_parse_requirements_hashes(monkeypatch): + files = { + "a.txt": [ + "foo==1.2.3 " + "--hash=sha256:862db587c4257f71293cf07cafc521961712c088a52981f3d81be056eaabc95e " + "--hash=sha256:0cfea7e5a53d5a256b4e8609c8a1812ad9af5c611432ec9dccbb4d79dc6a336e " + "--hash=sha384:673546e6c3236a36e5db5f1bc9d2cb5f3f974d3d4e9031f405b1dc7874575e2ad91436d02edf8237a889ab1cecb35d56 " + "--hash=sha512:3b149832490a704091abed6a9bd40ef7f4176b279263d4cbbb440b067ced99cadc006c03bc47488755351022fb49f2f10edfec110f027039bda703d407135c47" + ] + } + monkeypatch.setattr(pip_api._parse_requirements, "_read_file", files.get) + + result = pip_api.parse_requirements("a.txt") + + assert set(result) == {"foo"} + assert result["foo"].hashes == { + "sha256": [ + "862db587c4257f71293cf07cafc521961712c088a52981f3d81be056eaabc95e", + "0cfea7e5a53d5a256b4e8609c8a1812ad9af5c611432ec9dccbb4d79dc6a336e", + ], + "sha384": [ + "673546e6c3236a36e5db5f1bc9d2cb5f3f974d3d4e9031f405b1dc7874575e2ad91436d02edf8237a889ab1cecb35d56" + ], + "sha512": [ + "3b149832490a704091abed6a9bd40ef7f4176b279263d4cbbb440b067ced99cadc006c03bc47488755351022fb49f2f10edfec110f027039bda703d407135c47" + ], + } + + +def test_parse_requirements_invalid_hash_kind(monkeypatch): + files = {"a.txt": ["foo==1.2.3 --hash=md5:0d5a28f01dccb5a549c31016883f59c2"]} + monkeypatch.setattr(pip_api._parse_requirements, "_read_file", files.get) + + with pytest.raises(PipError, match=r"Invalid --hash kind"): + pip_api.parse_requirements("a.txt") + + +@pytest.mark.parametrize( + "strict_hashes", + (True, False), +) +def test_parse_requirements_missing_hashes(monkeypatch, strict_hashes): + files = { + "a.txt": [ + "foo==1.2.3 --hash=sha256:862db587c4257f71293cf07cafc521961712c088a52981f3d81be056eaabc95e\n", + "bar==1.2.3\n", + ] + } + monkeypatch.setattr(pip_api._parse_requirements, "_read_file", files.get) + + if strict_hashes: + with pytest.raises( + PipError, match=r"Missing hashes for requirement in a\.txt, line 2" + ): + pip_api.parse_requirements("a.txt", strict_hashes=strict_hashes) + else: + result = pip_api.parse_requirements("a.txt", strict_hashes=strict_hashes) + + assert result["foo"].hashes == { + "sha256": [ + "862db587c4257f71293cf07cafc521961712c088a52981f3d81be056eaabc95e" + ], + } + assert result["bar"].hashes == {} + + +@pytest.mark.parametrize( + "strict_hashes", + (True, False), +) +def test_parse_requirements_missing_hashes_late(monkeypatch, strict_hashes): + files = { + "a.txt": [ + "foo==1.2.3\n", + "bar==1.2.3\n", + "baz==1.2.3 --hash=sha256:862db587c4257f71293cf07cafc521961712c088a52981f3d81be056eaabc95e\n", + ] + } + monkeypatch.setattr(pip_api._parse_requirements, "_read_file", files.get) + + if strict_hashes: + with pytest.raises( + PipError, match=r"Missing hashes for requirement in a\.txt, line 1" + ): + pip_api.parse_requirements("a.txt", strict_hashes=strict_hashes) + else: + result = pip_api.parse_requirements("a.txt", strict_hashes=strict_hashes) + + assert result["foo"].hashes == {} + assert result["bar"].hashes == {} + assert result["baz"].hashes == { + "sha256": [ + "862db587c4257f71293cf07cafc521961712c088a52981f3d81be056eaabc95e" + ], + } + + +def test_parse_requirements_missing_all_hashes_strict(monkeypatch): + files = { + "a.txt": [ + "foo==1.2.3\n", + "bar==1.2.3\n", + "baz==1.2.3\n", + ] + } + + monkeypatch.setattr(pip_api._parse_requirements, "_read_file", files.get) + + with pytest.raises( + PipError, match=r"Missing hashes for requirement in a\.txt, line 1" + ): + pip_api.parse_requirements("a.txt", strict_hashes=True)