From dae610b9cad8e355eec388025807bbedc244c5de Mon Sep 17 00:00:00 2001 From: Matt Oberle Date: Tue, 21 Jun 2022 19:29:38 -0400 Subject: [PATCH] Use PEP 508 rules when setting deps from extras (#724) * Use PEP 426 rules when setting deps from extras This commit addresses issue #720. [PEP 426](https://peps.python.org/pep-0426/#name) states that distribution names are case-insensitive and "-" is interchangeable with "_". The `pip-compile` command creates a lockfile where all package names are lowercase. The tool may also modify interchangeable characters. The following examples are all valid `requirements.txt` or `requirements_lock.txt` entries: ``` SQLAlchemy[postgresql_psycopg2binary]==1.4.36 sqlalchemy[postgresql_psycopg2binary]==1.4.36 sentry_sdk[flask]==1.5.8 sentry-sdk[flask]==1.5.8 ``` A distribution's `METADATA` file contains the stylization chosen by the publisher. By applying a "sanitise" function when building the `extras` dict and when performing lookups we can eliminate this difference as a concern. * Use PEP 503 rules when sanitising extras * Normalize distribution name with pkg_resources `pypa/installer` is used to parse Wheel metadata, but does not currently provide a method for normalizing distribution names: - https://github.com/pypa/installer/issues/97 `pypa/pkg_resources` provides `Requirement.parse` which returns an instance of `Requirement` where `.key` is the canonical distribution name per PEP 503. The `Requirement` class can also parse `extras`, but it returns a normalized form that I believe could break the installation of the extras. * Use Requirement.parse to populate extra reqs * Revert "Use Requirement.parse to populate extra reqs" This reverts commit f0faa9795a219d865d579da430806598b8779753. * Test for distribution name normalization in extras * Replace pkg_resources with packaging.utils This replaces `pkg_resources.Requirement.parse` with `packaging.utils.canonicalize_name`. Doing this pulls in a vendored requirement from `pip`, which may be undesirable. The code we want is just: ``` re.sub(r"[-_.]+", "-", name).lower() ``` This commit also leaves a reference to `pkg_resources` in `wheel.py` which does not canonicalize the name. Co-authored-by: Jonathon Belotti Co-authored-by: Alex Eagle --- python/pip_install/extract_wheels/lib/requirements.py | 4 +++- python/pip_install/extract_wheels/lib/requirements_test.py | 2 ++ python/pip_install/extract_wheels/lib/wheel.py | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/python/pip_install/extract_wheels/lib/requirements.py b/python/pip_install/extract_wheels/lib/requirements.py index cfab339673..caf20d0f79 100644 --- a/python/pip_install/extract_wheels/lib/requirements.py +++ b/python/pip_install/extract_wheels/lib/requirements.py @@ -1,6 +1,8 @@ import re from typing import Dict, Optional, Set, Tuple +from pip._vendor.packaging.utils import canonicalize_name + def parse_extras(requirements_path: str) -> Dict[str, Set[str]]: """Parse over the requirements.txt file to find extras requested. @@ -38,7 +40,7 @@ def _parse_requirement_for_extra( matches = extras_pattern.match(requirement) if matches: return ( - matches.group(1), + canonicalize_name(matches.group(1)), {extra.strip() for extra in matches.group(2).split(",")}, ) diff --git a/python/pip_install/extract_wheels/lib/requirements_test.py b/python/pip_install/extract_wheels/lib/requirements_test.py index 0ee425571f..4fe4d92e32 100644 --- a/python/pip_install/extract_wheels/lib/requirements_test.py +++ b/python/pip_install/extract_wheels/lib/requirements_test.py @@ -9,6 +9,8 @@ def test_parses_requirement_for_extra(self) -> None: ("name[foo]", ("name", frozenset(["foo"]))), ("name[ Foo123 ]", ("name", frozenset(["Foo123"]))), (" name1[ foo ] ", ("name1", frozenset(["foo"]))), + ("Name[foo]", ("name", frozenset(["foo"]))), + ("name_foo[bar]", ("name-foo", frozenset(["bar"]))), ( "name [fred,bar] @ http://foo.com ; python_version=='2.7'", ("name", frozenset(["fred", "bar"])), diff --git a/python/pip_install/extract_wheels/lib/wheel.py b/python/pip_install/extract_wheels/lib/wheel.py index 74a963f382..6dab311637 100644 --- a/python/pip_install/extract_wheels/lib/wheel.py +++ b/python/pip_install/extract_wheels/lib/wheel.py @@ -8,6 +8,7 @@ import installer import pkg_resources +from pip._vendor.packaging.utils import canonicalize_name def current_umask() -> int: @@ -38,7 +39,8 @@ def path(self) -> str: @property def name(self) -> str: # TODO Also available as installer.sources.WheelSource.distribution - return str(self.metadata['Name']) + name = str(self.metadata['Name']) + return canonicalize_name(name) @property def metadata(self) -> email.message.Message: