diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index 6e084101995e259..b306d5f55a714f2 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -308,6 +308,10 @@ Python module or `Import Package >> packages_distributions() {'importlib_metadata': ['importlib-metadata'], 'yaml': ['PyYAML'], 'jaraco': ['jaraco.classes', 'jaraco.functools'], ...} +Some editable installs, `do not supply top-level names +`_, and thus this +function is not reliable with such installs. + .. versionadded:: 3.10 .. _distributions: diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 40ab1a1aaac3289..54194a69a1a6ac9 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -12,7 +12,9 @@ import functools import itertools import posixpath +import contextlib import collections +import inspect from . import _adapters, _meta from ._collections import FreezableDefaultDict, Pair @@ -24,7 +26,7 @@ from importlib import import_module from importlib.abc import MetaPathFinder from itertools import starmap -from typing import List, Mapping, Optional +from typing import List, Mapping, Optional, cast __all__ = [ @@ -341,11 +343,11 @@ def __repr__(self): return f'' -class Distribution: +class Distribution(metaclass=abc.ABCMeta): """A Python distribution package.""" @abc.abstractmethod - def read_text(self, filename): + def read_text(self, filename) -> Optional[str]: """Attempt to load metadata file given by the name. :param filename: The name of the file in the distribution info. @@ -419,7 +421,7 @@ def metadata(self) -> _meta.PackageMetadata: The returned object will have keys that name the various bits of metadata. See PEP 566 for details. """ - text = ( + opt_text = ( self.read_text('METADATA') or self.read_text('PKG-INFO') # This last clause is here to support old egg-info files. Its @@ -427,6 +429,7 @@ def metadata(self) -> _meta.PackageMetadata: # (which points to the egg-info file) attribute unchanged. or self.read_text('') ) + text = cast(str, opt_text) return _adapters.Message(email.message_from_string(text)) @property @@ -455,8 +458,8 @@ def files(self): :return: List of PackagePath for this distribution or None Result is `None` if the metadata file that enumerates files - (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is - missing. + (i.e. RECORD for dist-info, or installed-files.txt or + SOURCES.txt for egg-info) is missing. Result may be empty if the metadata exists but is empty. """ @@ -469,9 +472,19 @@ def make_file(name, hash=None, size_str=None): @pass_none def make_files(lines): - return list(starmap(make_file, csv.reader(lines))) + return starmap(make_file, csv.reader(lines)) - return make_files(self._read_files_distinfo() or self._read_files_egginfo()) + @pass_none + def skip_missing_files(package_paths): + return list(filter(lambda path: path.locate().exists(), package_paths)) + + return skip_missing_files( + make_files( + self._read_files_distinfo() + or self._read_files_egginfo_installed() + or self._read_files_egginfo_sources() + ) + ) def _read_files_distinfo(self): """ @@ -480,10 +493,43 @@ def _read_files_distinfo(self): text = self.read_text('RECORD') return text and text.splitlines() - def _read_files_egginfo(self): + def _read_files_egginfo_installed(self): """ - SOURCES.txt might contain literal commas, so wrap each line - in quotes. + Read installed-files.txt and return lines in a similar + CSV-parsable format as RECORD: each file must be placed + relative to the site-packages directory, and must also be + quoted (since file names can contain literal commas). + + This file is written when the package is installed by pip, + but it might not be written for other installation methods. + Hence, even if we can assume that this file is accurate + when it exists, we cannot assume that it always exists. + """ + text = self.read_text('installed-files.txt') + # We need to prepend the .egg-info/ subdir to the lines in this file. + # But this subdir is only available in the PathDistribution's self._path + # which is not easily accessible from this base class... + subdir = getattr(self, '_path', None) + if not text or not subdir: + return + with contextlib.suppress(Exception): + ret = [ + str((subdir / line).resolve().relative_to(self.locate_file(''))) + for line in text.splitlines() + ] + return map('"{}"'.format, ret) + + def _read_files_egginfo_sources(self): + """ + Read SOURCES.txt and return lines in a similar CSV-parsable + format as RECORD: each file name must be quoted (since it + might contain literal commas). + + Note that SOURCES.txt is not a reliable source for what + files are installed by a package. This file is generated + for a source archive, and the files that are present + there (e.g. setup.py) may not correctly reflect the files + that are present after the package has been installed. """ text = self.read_text('SOURCES.txt') return text and map('"{}"'.format, text.splitlines()) @@ -886,8 +932,13 @@ def _top_level_declared(dist): def _top_level_inferred(dist): - return { - f.parts[0] if len(f.parts) > 1 else f.with_suffix('').name + opt_names = { + f.parts[0] if len(f.parts) > 1 else inspect.getmodulename(f) for f in always_iterable(dist.files) - if f.suffix == ".py" } + + @pass_none + def importable_name(name): + return '.' not in name + + return filter(importable_name, opt_names) diff --git a/Lib/importlib/metadata/_adapters.py b/Lib/importlib/metadata/_adapters.py index aa460d3eda50fbb..6aed69a30857e42 100644 --- a/Lib/importlib/metadata/_adapters.py +++ b/Lib/importlib/metadata/_adapters.py @@ -1,3 +1,5 @@ +import functools +import warnings import re import textwrap import email.message @@ -5,6 +7,15 @@ from ._text import FoldedCase +# Do not remove prior to 2024-01-01 or Python 3.14 +_warn = functools.partial( + warnings.warn, + "Implicit None on return values is deprecated and will raise KeyErrors.", + DeprecationWarning, + stacklevel=2, +) + + class Message(email.message.Message): multiple_use_keys = set( map( @@ -39,6 +50,16 @@ def __init__(self, *args, **kwargs): def __iter__(self): return super().__iter__() + def __getitem__(self, item): + """ + Warn users that a ``KeyError`` can be expected when a + mising key is supplied. Ref python/importlib_metadata#371. + """ + res = super().__getitem__(item) + if res is None: + _warn() + return res + def _repair_headers(self): def redent(value): "Correct for RFC822 indentation" diff --git a/Lib/importlib/metadata/_meta.py b/Lib/importlib/metadata/_meta.py index d5c0576194ece23..c9a7ef906a8a8c2 100644 --- a/Lib/importlib/metadata/_meta.py +++ b/Lib/importlib/metadata/_meta.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Iterator, List, Protocol, TypeVar, Union +from typing import Protocol +from typing import Any, Dict, Iterator, List, Optional, TypeVar, Union, overload _T = TypeVar("_T") @@ -17,7 +18,21 @@ def __getitem__(self, key: str) -> str: def __iter__(self) -> Iterator[str]: ... # pragma: no cover - def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]: + @overload + def get(self, name: str, failobj: None = None) -> Optional[str]: + ... # pragma: no cover + + @overload + def get(self, name: str, failobj: _T) -> Union[str, _T]: + ... # pragma: no cover + + # overload per python/importlib_metadata#435 + @overload + def get_all(self, name: str, failobj: None = None) -> Optional[List[Any]]: + ... # pragma: no cover + + @overload + def get_all(self, name: str, failobj: _T) -> Union[List[Any], _T]: """ Return all values associated with a possibly multi-valued key. """ @@ -29,18 +44,19 @@ def json(self) -> Dict[str, Union[str, List[str]]]: """ -class SimplePath(Protocol): +class SimplePath(Protocol[_T]): """ A minimal subset of pathlib.Path required by PathDistribution. """ - def joinpath(self) -> 'SimplePath': + def joinpath(self) -> _T: ... # pragma: no cover - def __truediv__(self) -> 'SimplePath': + def __truediv__(self, other: Union[str, _T]) -> _T: ... # pragma: no cover - def parent(self) -> 'SimplePath': + @property + def parent(self) -> _T: ... # pragma: no cover def read_text(self) -> str: diff --git a/Lib/test/test_importlib/fixtures.py b/Lib/test/test_importlib/fixtures.py index e7be77b3957c670..a364a977bce781e 100644 --- a/Lib/test/test_importlib/fixtures.py +++ b/Lib/test/test_importlib/fixtures.py @@ -10,7 +10,10 @@ from test.support.os_helper import FS_NONASCII from test.support import requires_zlib -from typing import Dict, Union + +from . import _path +from ._path import FilesSpec + try: from importlib import resources # type: ignore @@ -83,13 +86,8 @@ def setUp(self): self.fixtures.enter_context(self.add_sys_path(self.site_dir)) -# Except for python/mypy#731, prefer to define -# FilesDef = Dict[str, Union['FilesDef', str]] -FilesDef = Dict[str, Union[Dict[str, Union[Dict[str, str], str]], str]] - - class DistInfoPkg(OnSysPath, SiteDir): - files: FilesDef = { + files: FilesSpec = { "distinfo_pkg-1.0.0.dist-info": { "METADATA": """ Name: distinfo-pkg @@ -131,7 +129,7 @@ def make_uppercase(self): class DistInfoPkgWithDot(OnSysPath, SiteDir): - files: FilesDef = { + files: FilesSpec = { "pkg_dot-1.0.0.dist-info": { "METADATA": """ Name: pkg.dot @@ -146,7 +144,7 @@ def setUp(self): class DistInfoPkgWithDotLegacy(OnSysPath, SiteDir): - files: FilesDef = { + files: FilesSpec = { "pkg.dot-1.0.0.dist-info": { "METADATA": """ Name: pkg.dot @@ -173,7 +171,7 @@ def setUp(self): class EggInfoPkg(OnSysPath, SiteDir): - files: FilesDef = { + files: FilesSpec = { "egginfo_pkg.egg-info": { "PKG-INFO": """ Name: egginfo-pkg @@ -212,8 +210,99 @@ def setUp(self): build_files(EggInfoPkg.files, prefix=self.site_dir) +class EggInfoPkgPipInstalledNoToplevel(OnSysPath, SiteDir): + files: FilesSpec = { + "egg_with_module_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_module-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + egg_with_module.py + setup.py + egg_with_module_pkg.egg-info/PKG-INFO + egg_with_module_pkg.egg-info/SOURCES.txt + egg_with_module_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + ../egg_with_module.py + PKG-INFO + SOURCES.txt + top_level.txt + """, + # missing top_level.txt (to trigger fallback to installed-files.txt) + }, + "egg_with_module.py": """ + def main(): + print("hello world") + """, + } + + def setUp(self): + super().setUp() + build_files(EggInfoPkgPipInstalledNoToplevel.files, prefix=self.site_dir) + + +class EggInfoPkgPipInstalledNoModules(OnSysPath, SiteDir): + files: FilesSpec = { + "egg_with_no_modules_pkg.egg-info": { + "PKG-INFO": "Name: egg_with_no_modules-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + setup.py + egg_with_no_modules_pkg.egg-info/PKG-INFO + egg_with_no_modules_pkg.egg-info/SOURCES.txt + egg_with_no_modules_pkg.egg-info/top_level.txt + """, + # installed-files.txt is written by pip, and is a strictly more + # accurate source than SOURCES.txt as to the installed contents of + # the package. + "installed-files.txt": """ + PKG-INFO + SOURCES.txt + top_level.txt + """, + # top_level.txt correctly reflects that no modules are installed + "top_level.txt": b"\n", + }, + } + + def setUp(self): + super().setUp() + build_files(EggInfoPkgPipInstalledNoModules.files, prefix=self.site_dir) + + +class EggInfoPkgSourcesFallback(OnSysPath, SiteDir): + files: FilesSpec = { + "sources_fallback_pkg.egg-info": { + "PKG-INFO": "Name: sources_fallback-pkg", + # SOURCES.txt is made from the source archive, and contains files + # (setup.py) that are not present after installation. + "SOURCES.txt": """ + sources_fallback.py + setup.py + sources_fallback_pkg.egg-info/PKG-INFO + sources_fallback_pkg.egg-info/SOURCES.txt + """, + # missing installed-files.txt (i.e. not installed by pip) and + # missing top_level.txt (to trigger fallback to SOURCES.txt) + }, + "sources_fallback.py": """ + def main(): + print("hello world") + """, + } + + def setUp(self): + super().setUp() + build_files(EggInfoPkgSourcesFallback.files, prefix=self.site_dir) + + class EggInfoFile(OnSysPath, SiteDir): - files: FilesDef = { + files: FilesSpec = { "egginfo_file.egg-info": """ Metadata-Version: 1.0 Name: egginfo_file @@ -233,38 +322,22 @@ def setUp(self): build_files(EggInfoFile.files, prefix=self.site_dir) -def build_files(file_defs, prefix=pathlib.Path()): - """Build a set of files/directories, as described by the +# dedent all text strings before writing +orig = _path.create.registry[str] +_path.create.register(str, lambda content, path: orig(DALS(content), path)) - file_defs dictionary. Each key/value pair in the dictionary is - interpreted as a filename/contents pair. If the contents value is a - dictionary, a directory is created, and the dictionary interpreted - as the files within it, recursively. - For example: +build_files = _path.build - {"README.txt": "A README file", - "foo": { - "__init__.py": "", - "bar": { - "__init__.py": "", - }, - "baz.py": "# Some code", - } - } - """ - for name, contents in file_defs.items(): - full_name = prefix / name - if isinstance(contents, dict): - full_name.mkdir() - build_files(contents, prefix=full_name) - else: - if isinstance(contents, bytes): - with full_name.open('wb') as f: - f.write(contents) - else: - with full_name.open('w', encoding='utf-8') as f: - f.write(DALS(contents)) + +def build_record(file_defs): + return ''.join(f'{name},,\n' for name in record_names(file_defs)) + + +def record_names(file_defs): + recording = _path.Recording() + _path.build(file_defs, recording) + return recording.record class FileBuilder: diff --git a/Lib/test/test_importlib/test_main.py b/Lib/test/test_importlib/test_main.py index 30b68b6ae7d86ed..5f6a0d08c16f33e 100644 --- a/Lib/test/test_importlib/test_main.py +++ b/Lib/test/test_importlib/test_main.py @@ -2,6 +2,7 @@ import pickle import unittest import importlib.metadata +import itertools try: import pyfakefs.fake_filesystem_unittest as ffs @@ -37,7 +38,7 @@ def test_for_name_does_not_exist(self): def test_package_not_found_mentions_metadata(self): """ When a package is not found, that could indicate that the - packgae is not installed or that it is installed without + package is not installed or that it is installed without metadata. Ensure the exception mentions metadata to help guide users toward the cause. See #124. """ @@ -46,8 +47,9 @@ def test_package_not_found_mentions_metadata(self): assert "metadata" in str(ctx.exception) - def test_new_style_classes(self): - self.assertIsInstance(Distribution, type) + def test_abc_enforced(self): + with self.assertRaises(TypeError): + type('DistributionSubclass', (Distribution,), {})() @fixtures.parameterize( dict(name=None), @@ -172,11 +174,21 @@ def test_metadata_loads_egg_info(self): assert meta['Description'] == 'pôrˈtend' -class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase): +class DiscoveryTests( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + fixtures.DistInfoPkg, + unittest.TestCase, +): def test_package_discovery(self): dists = list(distributions()) assert all(isinstance(dist, Distribution) for dist in dists) assert any(dist.metadata['Name'] == 'egginfo-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_module-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'egg_with_no_modules-pkg' for dist in dists) + assert any(dist.metadata['Name'] == 'sources_fallback-pkg' for dist in dists) assert any(dist.metadata['Name'] == 'distinfo-pkg' for dist in dists) def test_invalid_usage(self): @@ -324,3 +336,79 @@ def test_packages_distributions_neither_toplevel_nor_files(self): prefix=self.site_dir, ) packages_distributions() + + def test_packages_distributions_all_module_types(self): + """ + Test top-level modules detected on a package without 'top-level.txt'. + """ + suffixes = importlib.machinery.all_suffixes() + metadata = dict( + METADATA=""" + Name: all_distributions + Version: 1.0.0 + """, + ) + files = { + 'all_distributions-1.0.0.dist-info': metadata, + } + for i, suffix in enumerate(suffixes): + files.update( + { + f'importable-name {i}{suffix}': '', + f'in_namespace_{i}': { + f'mod{suffix}': '', + }, + f'in_package_{i}': { + '__init__.py': '', + f'mod{suffix}': '', + }, + } + ) + metadata.update(RECORD=fixtures.build_record(files)) + fixtures.build_files(files, prefix=self.site_dir) + + distributions = packages_distributions() + + for i in range(len(suffixes)): + assert distributions[f'importable-name {i}'] == ['all_distributions'] + assert distributions[f'in_namespace_{i}'] == ['all_distributions'] + assert distributions[f'in_package_{i}'] == ['all_distributions'] + + assert not any(name.endswith('.dist-info') for name in distributions) + + +class PackagesDistributionsEggTest( + fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, + unittest.TestCase, +): + def test_packages_distributions_on_eggs(self): + """ + Test old-style egg packages with a variation of 'top_level.txt', + 'SOURCES.txt', and 'installed-files.txt', available. + """ + distributions = packages_distributions() + + def import_names_from_package(package_name): + return { + import_name + for import_name, package_names in distributions.items() + if package_name in package_names + } + + # egginfo-pkg declares one import ('mod') via top_level.txt + assert import_names_from_package('egginfo-pkg') == {'mod'} + + # egg_with_module-pkg has one import ('egg_with_module') inferred from + # installed-files.txt (top_level.txt is missing) + assert import_names_from_package('egg_with_module-pkg') == {'egg_with_module'} + + # egg_with_no_modules-pkg should not be associated with any import names + # (top_level.txt is empty, and installed-files.txt has no .py files) + assert import_names_from_package('egg_with_no_modules-pkg') == set() + + # sources_fallback-pkg has one import ('sources_fallback') inferred from + # SOURCES.txt (top_level.txt and installed-files.txt is missing) + assert import_names_from_package('sources_fallback-pkg') == {'sources_fallback'} diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py index 71c47e62d271248..33c6e85ee947535 100644 --- a/Lib/test/test_importlib/test_metadata_api.py +++ b/Lib/test/test_importlib/test_metadata_api.py @@ -27,12 +27,14 @@ def suppress_known_deprecation(): class APITests( fixtures.EggInfoPkg, + fixtures.EggInfoPkgPipInstalledNoToplevel, + fixtures.EggInfoPkgPipInstalledNoModules, + fixtures.EggInfoPkgSourcesFallback, fixtures.DistInfoPkg, fixtures.DistInfoPkgWithDot, fixtures.EggInfoFile, unittest.TestCase, ): - version_pattern = r'\d+\.\d+(\.\d)?' def test_retrieves_version_of_self(self): @@ -63,15 +65,28 @@ def test_prefix_not_matched(self): distribution(prefix) def test_for_top_level(self): - self.assertEqual( - distribution('egginfo-pkg').read_text('top_level.txt').strip(), 'mod' - ) + tests = [ + ('egginfo-pkg', 'mod'), + ('egg_with_no_modules-pkg', ''), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + self.assertEqual( + distribution(pkg_name).read_text('top_level.txt').strip(), + expect_content, + ) def test_read_text(self): - top_level = [ - path for path in files('egginfo-pkg') if path.name == 'top_level.txt' - ][0] - self.assertEqual(top_level.read_text(), 'mod\n') + tests = [ + ('egginfo-pkg', 'mod\n'), + ('egg_with_no_modules-pkg', '\n'), + ] + for pkg_name, expect_content in tests: + with self.subTest(pkg_name): + top_level = [ + path for path in files(pkg_name) if path.name == 'top_level.txt' + ][0] + self.assertEqual(top_level.read_text(), expect_content) def test_entry_points(self): eps = entry_points() @@ -137,6 +152,28 @@ def test_metadata_for_this_package(self): classifiers = md.get_all('Classifier') assert 'Topic :: Software Development :: Libraries' in classifiers + def test_missing_key_legacy(self): + """ + Requesting a missing key will still return None, but warn. + """ + md = metadata('distinfo-pkg') + with suppress_known_deprecation(): + assert md['does-not-exist'] is None + + def test_get_key(self): + """ + Getting a key gets the key. + """ + md = metadata('egginfo-pkg') + assert md.get('Name') == 'egginfo-pkg' + + def test_get_missing_key(self): + """ + Requesting a missing key will return None. + """ + md = metadata('distinfo-pkg') + assert md.get('does-not-exist') is None + @staticmethod def _test_files(files): root = files[0].root @@ -159,6 +196,9 @@ def test_files_dist_info(self): def test_files_egg_info(self): self._test_files(files('egginfo-pkg')) + self._test_files(files('egg_with_module-pkg')) + self._test_files(files('egg_with_no_modules-pkg')) + self._test_files(files('sources_fallback-pkg')) def test_version_egg_info_file(self): self.assertEqual(version('egginfo-file'), '0.1')