diff --git a/news/7269.bugfix.rst b/news/7269.bugfix.rst new file mode 100644 index 00000000000..6fb4ba3af65 --- /dev/null +++ b/news/7269.bugfix.rst @@ -0,0 +1,3 @@ +Ignore ``.dist-info`` directories if the stem is not a valid Python distribution +name. These are not usable as installation metadata anyway, and excluding them +prevents their contents from causing cryptic errors in other parts of pip. diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 724b0c04494..895cabef29f 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -1,3 +1,5 @@ +import logging +import re from typing import Container, Iterator, List, Optional from pip._vendor.packaging.version import _BaseVersion @@ -5,7 +7,21 @@ from pip._internal.utils.misc import stdlib_pkgs # TODO: Move definition here. +logger = logging.getLogger(__name__) + + class BaseDistribution: + @property + def location(self): + # type: () -> Optional[str] + """Where the distribution is loaded from. + + A string value is not necessarily a filesystem path, since distributions + can be loaded from other sources, e.g. arbitrary zip archives. ``None`` + means the distribution is created in-memory. + """ + raise NotImplementedError() + @property def metadata_version(self): # type: () -> Optional[str] @@ -61,10 +77,37 @@ def get_distribution(self, name): """Given a requirement name, return the installed distributions.""" raise NotImplementedError() + def _iter_distributions(self): + # type: () -> Iterator[BaseDistribution] + """Iterate through installed distributions. + + This function should be implemented by subclass, but never called + directly. Use the public ``iter_distribution()`` instead, which + implements additional logic to make sure the distributions are valid. + """ + raise NotImplementedError() + def iter_distributions(self): # type: () -> Iterator[BaseDistribution] """Iterate through installed distributions.""" - raise NotImplementedError() + for dist in self._iter_distributions(): + # Make sure the distribution actually comes from a valid Python + # packaging distribution. Pip's AdjacentTempDirectory leaves folders + # e.g. ``~atplotlib.dist-info`` if cleanup was interrupted. The + # valid project name pattern is taken from PEP 508. + project_name_valid = re.match( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", + dist.canonical_name, + flags=re.IGNORECASE, + ) + if not project_name_valid: + logger.warning( + "Ignoring invalid distribution %s (%s)", + dist.canonical_name, + dist.location, + ) + continue + yield dist def iter_installed_distributions( self, diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index d2fb29e2e9a..5cd9eaee641 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -24,6 +24,11 @@ def from_wheel(cls, path, name): dist = pkg_resources_distribution_for_wheel(zf, name, path) return cls(dist) + @property + def location(self): + # type: () -> Optional[str] + return self._dist.location + @property def metadata_version(self): # type: () -> Optional[str] @@ -115,7 +120,7 @@ def get_distribution(self, name): return None return self._search_distribution(name) - def iter_distributions(self): + def _iter_distributions(self): # type: () -> Iterator[BaseDistribution] for dist in self._ws: yield Distribution(dist) diff --git a/tests/functional/test_freeze.py b/tests/functional/test_freeze.py index 4fd91b5d8e5..dbda48a92b4 100644 --- a/tests/functional/test_freeze.py +++ b/tests/functional/test_freeze.py @@ -5,6 +5,7 @@ from doctest import ELLIPSIS, OutputChecker import pytest +from packaging.utils import canonicalize_name from tests.lib import ( _create_test_package, @@ -128,26 +129,23 @@ def fake_install(pkgname, dest): ) for pkgname in valid_pkgnames + invalid_pkgnames: fake_install(pkgname, script.site_packages_path) + result = script.pip('freeze', expect_stderr=True) - for pkgname in valid_pkgnames: - _check_output( - result.stdout, - '...{}==1.0...'.format(pkgname.replace('_', '-')) - ) - for pkgname in invalid_pkgnames: - # Check that the full distribution repr is present. - dist_repr = '{} 1.0 ('.format(pkgname.replace('_', '-')) - expected = ( - '...Could not generate requirement for ' - 'distribution {}...'.format(dist_repr) - ) - _check_output(result.stderr, expected) - # Also check that the parse error details occur at least once. - # We only need to find one occurrence to know that exception details - # are logged. - expected = '...site-packages): Parse error at "...' - _check_output(result.stderr, expected) + # Check all valid names are in the output. + output_lines = {line.strip() for line in result.stdout.splitlines()} + for name in valid_pkgnames: + f"{canonicalize_name(name)}==1.0" in output_lines + + # Check all invalid names are excluded from the output. + canonical_invalid_names = {canonicalize_name(n) for n in invalid_pkgnames} + for line in output_lines: + output_name, _, _ = line.partition("=") + assert canonicalize_name(output_name) not in canonical_invalid_names + + # The invalid names should be logged. + for name in canonical_invalid_names: + assert f"Ignoring invalid distribution {name} (" in result.stderr @pytest.mark.git