From 99890636bfd9bb156655050f22f1df47e74ac3f6 Mon Sep 17 00:00:00 2001 From: Bruno Oliveira Date: Tue, 9 Apr 2024 13:21:51 -0300 Subject: [PATCH] Refine how we detect namespace packages (#12169) Previously we used a hand crafted approach to detect namespace packages, however we should rely on ``importlib`` to detect them for us. Fix #12112 --------- Co-authored-by: Ran Benita --- changelog/12112.improvement.rst | 1 + doc/en/reference/reference.rst | 3 +- src/_pytest/pathlib.py | 99 ++++++++++--- testing/test_pathlib.py | 244 ++++++++++++++++++++++++++++---- 4 files changed, 298 insertions(+), 49 deletions(-) create mode 100644 changelog/12112.improvement.rst diff --git a/changelog/12112.improvement.rst b/changelog/12112.improvement.rst new file mode 100644 index 00000000000..3f997b2af65 --- /dev/null +++ b/changelog/12112.improvement.rst @@ -0,0 +1 @@ +Improve namespace packages detection when :confval:`consider_namespace_packages` is enabled, covering more situations (like editable installs). diff --git a/doc/en/reference/reference.rst b/doc/en/reference/reference.rst index 21890fbf63e..c9d7aeb552c 100644 --- a/doc/en/reference/reference.rst +++ b/doc/en/reference/reference.rst @@ -1279,8 +1279,7 @@ passed multiple times. The expected format is ``name=value``. For example:: Controls if pytest should attempt to identify `namespace packages `__ when collecting Python modules. Default is ``False``. - Set to ``True`` if you are testing namespace packages installed into a virtual environment and it is important for - your packages to be imported using their full namespace package name. + Set to ``True`` if the package you are testing is part of a namespace package. Only `native namespace packages `__ are supported, with no plans to support `legacy namespace packages `__. diff --git a/src/_pytest/pathlib.py b/src/_pytest/pathlib.py index e39f4772326..254d9d9468e 100644 --- a/src/_pytest/pathlib.py +++ b/src/_pytest/pathlib.py @@ -8,6 +8,7 @@ from errno import ENOTDIR import fnmatch from functools import partial +from importlib.machinery import ModuleSpec import importlib.util import itertools import os @@ -628,11 +629,13 @@ def _import_module_using_spec( # such as our own assertion-rewrite hook. for meta_importer in sys.meta_path: spec = meta_importer.find_spec(module_name, [str(module_location)]) - if spec is not None: + if spec_matches_module_path(spec, module_path): break else: spec = importlib.util.spec_from_file_location(module_name, str(module_path)) - if spec is not None: + + if spec_matches_module_path(spec, module_path): + assert spec is not None mod = importlib.util.module_from_spec(spec) sys.modules[module_name] = mod spec.loader.exec_module(mod) # type: ignore[union-attr] @@ -643,6 +646,16 @@ def _import_module_using_spec( return None +def spec_matches_module_path( + module_spec: Optional[ModuleSpec], module_path: Path +) -> bool: + """Return true if the given ModuleSpec can be used to import the given module path.""" + if module_spec is None or module_spec.origin is None: + return False + + return Path(module_spec.origin) == module_path + + # Implement a special _is_same function on Windows which returns True if the two filenames # compare equal, to circumvent os.path.samefile returning False for mounts in UNC (#7678). if sys.platform.startswith("win"): @@ -762,39 +775,79 @@ def resolve_pkg_root_and_module_name( Passing the full path to `models.py` will yield Path("src") and "app.core.models". If consider_namespace_packages is True, then we additionally check upwards in the hierarchy - until we find a directory that is reachable from sys.path, which marks it as a namespace package: + for namespace packages: https://packaging.python.org/en/latest/guides/packaging-namespace-packages Raises CouldNotResolvePathError if the given path does not belong to a package (missing any __init__.py files). """ + pkg_root: Optional[Path] = None pkg_path = resolve_package_path(path) if pkg_path is not None: pkg_root = pkg_path.parent - # https://packaging.python.org/en/latest/guides/packaging-namespace-packages/ - if consider_namespace_packages: - # Go upwards in the hierarchy, if we find a parent path included - # in sys.path, it means the package found by resolve_package_path() - # actually belongs to a namespace package. - for parent in pkg_root.parents: - # If any of the parent paths has a __init__.py, it means it is not - # a namespace package (see the docs linked above). - if (parent / "__init__.py").is_file(): - break - if str(parent) in sys.path: - # Point the pkg_root to the root of the namespace package. - pkg_root = parent - break - - names = list(path.with_suffix("").relative_to(pkg_root).parts) - if names[-1] == "__init__": - names.pop() - module_name = ".".join(names) - return pkg_root, module_name + if consider_namespace_packages: + start = pkg_root if pkg_root is not None else path.parent + for candidate in (start, *start.parents): + module_name = compute_module_name(candidate, path) + if module_name and is_importable(module_name, path): + # Point the pkg_root to the root of the namespace package. + pkg_root = candidate + break + + if pkg_root is not None: + module_name = compute_module_name(pkg_root, path) + if module_name: + return pkg_root, module_name raise CouldNotResolvePathError(f"Could not resolve for {path}") +def is_importable(module_name: str, module_path: Path) -> bool: + """ + Return if the given module path could be imported normally by Python, akin to the user + entering the REPL and importing the corresponding module name directly, and corresponds + to the module_path specified. + + :param module_name: + Full module name that we want to check if is importable. + For example, "app.models". + + :param module_path: + Full path to the python module/package we want to check if is importable. + For example, "/projects/src/app/models.py". + """ + try: + # Note this is different from what we do in ``_import_module_using_spec``, where we explicitly search through + # sys.meta_path to be able to pass the path of the module that we want to import (``meta_importer.find_spec``). + # Using importlib.util.find_spec() is different, it gives the same results as trying to import + # the module normally in the REPL. + spec = importlib.util.find_spec(module_name) + except (ImportError, ValueError, ImportWarning): + return False + else: + return spec_matches_module_path(spec, module_path) + + +def compute_module_name(root: Path, module_path: Path) -> Optional[str]: + """Compute a module name based on a path and a root anchor.""" + try: + path_without_suffix = module_path.with_suffix("") + except ValueError: + # Empty paths (such as Path.cwd()) might break meta_path hooks (like our own assertion rewriter). + return None + + try: + relative = path_without_suffix.relative_to(root) + except ValueError: # pragma: no cover + return None + names = list(relative.parts) + if not names: + return None + if names[-1] == "__init__": + names.pop() + return ".".join(names) + + class CouldNotResolvePathError(Exception): """Custom exception raised by resolve_pkg_root_and_module_name.""" diff --git a/testing/test_pathlib.py b/testing/test_pathlib.py index 7f740a0607b..f96151bdd44 100644 --- a/testing/test_pathlib.py +++ b/testing/test_pathlib.py @@ -1,5 +1,7 @@ # mypy: allow-untyped-defs import errno +import importlib.abc +import importlib.machinery import os.path from pathlib import Path import pickle @@ -10,12 +12,15 @@ from typing import Any from typing import Generator from typing import Iterator +from typing import Optional +from typing import Sequence from typing import Tuple import unittest.mock from _pytest.monkeypatch import MonkeyPatch from _pytest.pathlib import bestrelpath from _pytest.pathlib import commonpath +from _pytest.pathlib import compute_module_name from _pytest.pathlib import CouldNotResolvePathError from _pytest.pathlib import ensure_deletable from _pytest.pathlib import fnmatch_ex @@ -25,6 +30,7 @@ from _pytest.pathlib import ImportMode from _pytest.pathlib import ImportPathMismatchError from _pytest.pathlib import insert_missing_modules +from _pytest.pathlib import is_importable from _pytest.pathlib import maybe_delete_a_numbered_dir from _pytest.pathlib import module_name_from_path from _pytest.pathlib import resolve_package_path @@ -33,6 +39,7 @@ from _pytest.pathlib import symlink_or_skip from _pytest.pathlib import visit from _pytest.pytester import Pytester +from _pytest.pytester import RunResult from _pytest.tmpdir import TempPathFactory import pytest @@ -717,12 +724,13 @@ def test_module_name_from_path(self, tmp_path: Path) -> None: assert result == "_env_310.tests.test_foo" def test_resolve_pkg_root_and_module_name( - self, tmp_path: Path, monkeypatch: MonkeyPatch + self, tmp_path: Path, monkeypatch: MonkeyPatch, pytester: Pytester ) -> None: # Create a directory structure first without __init__.py files. (tmp_path / "src/app/core").mkdir(parents=True) models_py = tmp_path / "src/app/core/models.py" models_py.touch() + with pytest.raises(CouldNotResolvePathError): _ = resolve_pkg_root_and_module_name(models_py) @@ -738,6 +746,8 @@ def test_resolve_pkg_root_and_module_name( # If we add tmp_path to sys.path, src becomes a namespace package. monkeypatch.syspath_prepend(tmp_path) + validate_namespace_package(pytester, [tmp_path], ["src.app.core.models"]) + assert resolve_pkg_root_and_module_name( models_py, consider_namespace_packages=True ) == ( @@ -1119,39 +1129,56 @@ def test_safe_exists(tmp_path: Path) -> None: class TestNamespacePackages: """Test import_path support when importing from properly namespace packages.""" + @pytest.fixture(autouse=True) + def setup_imports_tracking(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(sys, "pytest_namespace_packages_test", [], raising=False) + def setup_directories( - self, tmp_path: Path, monkeypatch: MonkeyPatch, pytester: Pytester + self, tmp_path: Path, monkeypatch: Optional[MonkeyPatch], pytester: Pytester ) -> Tuple[Path, Path]: + # Use a code to guard against modules being imported more than once. + # This is a safeguard in case future changes break this invariant. + code = dedent( + """ + import sys + imported = getattr(sys, "pytest_namespace_packages_test", []) + assert __name__ not in imported, f"{__name__} already imported" + imported.append(__name__) + sys.pytest_namespace_packages_test = imported + """ + ) + # Set up a namespace package "com.company", containing # two subpackages, "app" and "calc". (tmp_path / "src/dist1/com/company/app/core").mkdir(parents=True) - (tmp_path / "src/dist1/com/company/app/__init__.py").touch() - (tmp_path / "src/dist1/com/company/app/core/__init__.py").touch() + (tmp_path / "src/dist1/com/company/app/__init__.py").write_text( + code, encoding="UTF-8" + ) + (tmp_path / "src/dist1/com/company/app/core/__init__.py").write_text( + code, encoding="UTF-8" + ) models_py = tmp_path / "src/dist1/com/company/app/core/models.py" models_py.touch() (tmp_path / "src/dist2/com/company/calc/algo").mkdir(parents=True) - (tmp_path / "src/dist2/com/company/calc/__init__.py").touch() - (tmp_path / "src/dist2/com/company/calc/algo/__init__.py").touch() + (tmp_path / "src/dist2/com/company/calc/__init__.py").write_text( + code, encoding="UTF-8" + ) + (tmp_path / "src/dist2/com/company/calc/algo/__init__.py").write_text( + code, encoding="UTF-8" + ) algorithms_py = tmp_path / "src/dist2/com/company/calc/algo/algorithms.py" - algorithms_py.touch() + algorithms_py.write_text(code, encoding="UTF-8") - # Validate the namespace package by importing it in a Python subprocess. - r = pytester.runpython_c( - dedent( - f""" - import sys - sys.path.append(r{str(tmp_path / "src/dist1")!r}) - sys.path.append(r{str(tmp_path / "src/dist2")!r}) - import com.company.app.core.models - import com.company.calc.algo.algorithms - """ - ) + r = validate_namespace_package( + pytester, + [tmp_path / "src/dist1", tmp_path / "src/dist2"], + ["com.company.app.core.models", "com.company.calc.algo.algorithms"], ) assert r.ret == 0 - - monkeypatch.syspath_prepend(tmp_path / "src/dist1") - monkeypatch.syspath_prepend(tmp_path / "src/dist2") + if monkeypatch is not None: + monkeypatch.syspath_prepend(tmp_path / "src/dist1") + monkeypatch.syspath_prepend(tmp_path / "src/dist2") return models_py, algorithms_py @pytest.mark.parametrize("import_mode", ["prepend", "append", "importlib"]) @@ -1223,11 +1250,76 @@ def test_incorrect_namespace_package( models_py, algorithms_py = self.setup_directories( tmp_path, monkeypatch, pytester ) - # Namespace packages must not have an __init__.py at any of its - # directories; if it does, we then fall back to importing just the - # part of the package containing the __init__.py files. + # Namespace packages must not have an __init__.py at its top-level + # directory; if it does, it is no longer a namespace package, and we fall back + # to importing just the part of the package containing the __init__.py files. (tmp_path / "src/dist1/com/__init__.py").touch() + # Because of the __init__ file, 'com' is no longer a namespace package: + # 'com.company.app' is importable as a normal module. + # 'com.company.calc' is no longer importable because 'com' is not a namespace package anymore. + r = validate_namespace_package( + pytester, + [tmp_path / "src/dist1", tmp_path / "src/dist2"], + ["com.company.app.core.models", "com.company.calc.algo.algorithms"], + ) + assert r.ret == 1 + r.stderr.fnmatch_lines("*No module named 'com.company.calc*") + + pkg_root, module_name = resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=True + ) + assert (pkg_root, module_name) == ( + tmp_path / "src/dist1", + "com.company.app.core.models", + ) + + # dist2/com/company will contain a normal Python package. + pkg_root, module_name = resolve_pkg_root_and_module_name( + algorithms_py, consider_namespace_packages=True + ) + assert (pkg_root, module_name) == ( + tmp_path / "src/dist2/com/company", + "calc.algo.algorithms", + ) + + def test_detect_meta_path( + self, + tmp_path: Path, + monkeypatch: MonkeyPatch, + pytester: Pytester, + ) -> None: + """ + resolve_pkg_root_and_module_name() considers sys.meta_path when importing namespace packages. + + Regression test for #12112. + """ + + class CustomImporter(importlib.abc.MetaPathFinder): + """ + Imports the module name "com" as a namespace package. + + This ensures our namespace detection considers sys.meta_path, which is important + to support all possible ways a module can be imported (for example editable installs). + """ + + def find_spec( + self, name: str, path: Any = None, target: Any = None + ) -> Optional[importlib.machinery.ModuleSpec]: + if name == "com": + spec = importlib.machinery.ModuleSpec("com", loader=None) + spec.submodule_search_locations = [str(com_root_2), str(com_root_1)] + return spec + return None + + # Setup directories without configuring sys.path. + models_py, algorithms_py = self.setup_directories( + tmp_path, monkeypatch=None, pytester=pytester + ) + com_root_1 = tmp_path / "src/dist1/com" + com_root_2 = tmp_path / "src/dist2/com" + + # Because the namespace package is not setup correctly, we cannot resolve it as a namespace package. pkg_root, module_name = resolve_pkg_root_and_module_name( models_py, consider_namespace_packages=True ) @@ -1235,3 +1327,107 @@ def test_incorrect_namespace_package( tmp_path / "src/dist1/com/company", "app.core.models", ) + + # Insert our custom importer, which will recognize the "com" directory as a namespace package. + new_meta_path = [CustomImporter(), *sys.meta_path] + monkeypatch.setattr(sys, "meta_path", new_meta_path) + + # Now we should be able to resolve the path as namespace package. + pkg_root, module_name = resolve_pkg_root_and_module_name( + models_py, consider_namespace_packages=True + ) + assert (pkg_root, module_name) == ( + tmp_path / "src/dist1", + "com.company.app.core.models", + ) + + @pytest.mark.parametrize("insert", [True, False]) + def test_full_ns_packages_without_init_files( + self, pytester: Pytester, tmp_path: Path, monkeypatch: MonkeyPatch, insert: bool + ) -> None: + (tmp_path / "src/dist1/ns/b/app/bar/test").mkdir(parents=True) + (tmp_path / "src/dist1/ns/b/app/bar/m.py").touch() + + if insert: + # The presence of this __init__.py is not a problem, ns.b.app is still part of the namespace package. + (tmp_path / "src/dist1/ns/b/app/__init__.py").touch() + + (tmp_path / "src/dist2/ns/a/core/foo/test").mkdir(parents=True) + (tmp_path / "src/dist2/ns/a/core/foo/m.py").touch() + + # Validate the namespace package by importing it in a Python subprocess. + r = validate_namespace_package( + pytester, + [tmp_path / "src/dist1", tmp_path / "src/dist2"], + ["ns.b.app.bar.m", "ns.a.core.foo.m"], + ) + assert r.ret == 0 + monkeypatch.syspath_prepend(tmp_path / "src/dist1") + monkeypatch.syspath_prepend(tmp_path / "src/dist2") + + assert resolve_pkg_root_and_module_name( + tmp_path / "src/dist1/ns/b/app/bar/m.py", consider_namespace_packages=True + ) == (tmp_path / "src/dist1", "ns.b.app.bar.m") + assert resolve_pkg_root_and_module_name( + tmp_path / "src/dist2/ns/a/core/foo/m.py", consider_namespace_packages=True + ) == (tmp_path / "src/dist2", "ns.a.core.foo.m") + + +def test_is_importable(pytester: Pytester) -> None: + pytester.syspathinsert() + + path = pytester.path / "bar/foo.py" + path.parent.mkdir() + path.touch() + assert is_importable("bar.foo", path) is True + + # Ensure that the module that can be imported points to the path we expect. + path = pytester.path / "some/other/path/bar/foo.py" + path.mkdir(parents=True, exist_ok=True) + assert is_importable("bar.foo", path) is False + + # Paths containing "." cannot be imported. + path = pytester.path / "bar.x/__init__.py" + path.parent.mkdir() + path.touch() + assert is_importable("bar.x", path) is False + + # Pass starting with "." denote relative imports and cannot be checked using is_importable. + path = pytester.path / ".bar.x/__init__.py" + path.parent.mkdir() + path.touch() + assert is_importable(".bar.x", path) is False + + +def test_compute_module_name(tmp_path: Path) -> None: + assert compute_module_name(tmp_path, tmp_path) is None + assert compute_module_name(Path(), Path()) is None + + assert compute_module_name(tmp_path, tmp_path / "mod.py") == "mod" + assert compute_module_name(tmp_path, tmp_path / "src/app/bar") == "src.app.bar" + assert compute_module_name(tmp_path, tmp_path / "src/app/bar.py") == "src.app.bar" + assert ( + compute_module_name(tmp_path, tmp_path / "src/app/bar/__init__.py") + == "src.app.bar" + ) + + +def validate_namespace_package( + pytester: Pytester, paths: Sequence[Path], modules: Sequence[str] +) -> RunResult: + """ + Validate that a Python namespace package is set up correctly. + + In a sub interpreter, add 'paths' to sys.path and attempt to import the given modules. + + In this module many tests configure a set of files as a namespace package, this function + is used as sanity check that our files are configured correctly from the point of view of Python. + """ + lines = [ + "import sys", + # Configure sys.path. + *[f"sys.path.append(r{str(x)!r})" for x in paths], + # Imports. + *[f"import {x}" for x in modules], + ] + return pytester.runpython_c("\n".join(lines))