From 485be41041b8bb50bec72c5c321e9433a4df57f0 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Tue, 15 Oct 2024 16:25:20 -0700 Subject: [PATCH 1/5] Make is_sub_path faster See #17948 Haven't run the benchmark yet, but profile indicates that this could save 0.5s on both incremental and non-incremental builds in environments with long search path --- mypy/util.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mypy/util.py b/mypy/util.py index 74bf08c9d6de..e29de456a98a 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -5,7 +5,6 @@ import hashlib import io import os -import pathlib import re import shutil import sys @@ -411,9 +410,11 @@ def replace_object_state( pass -def is_sub_path(path1: str, path2: str) -> bool: - """Given two paths, return if path1 is a sub-path of path2.""" - return pathlib.Path(path2) in pathlib.Path(path1).parents +def is_sub_path(path: str, dir: str) -> bool: + """Given two paths, return if path is a sub-path of dir.""" + if not dir.endswith(os.sep): + dir += os.sep + return path.startswith(dir) if sys.platform == "linux" or sys.platform == "darwin": From a9602bef28ad0c87ae958ef708515927ff6f7282 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Tue, 15 Oct 2024 18:07:51 -0700 Subject: [PATCH 2/5] debug --- mypy/util.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mypy/util.py b/mypy/util.py index e29de456a98a..adef5bf08443 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -414,7 +414,14 @@ def is_sub_path(path: str, dir: str) -> bool: """Given two paths, return if path is a sub-path of dir.""" if not dir.endswith(os.sep): dir += os.sep - return path.startswith(dir) + ret = path.startswith(dir) + + import pathlib + + if ret != (pathlib.Path(dir) in pathlib.Path(path).parents): + raise AssertionError(f"mismatch for {path!r} and {dir!r}") + + return ret if sys.platform == "linux" or sys.platform == "darwin": From 4fd0e86ee21ba366847886f31ee9dccb64eb1830 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Tue, 15 Oct 2024 18:33:52 -0700 Subject: [PATCH 3/5] Revert "debug" This reverts commit a9602bef28ad0c87ae958ef708515927ff6f7282. --- mypy/util.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/mypy/util.py b/mypy/util.py index adef5bf08443..e29de456a98a 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -414,14 +414,7 @@ def is_sub_path(path: str, dir: str) -> bool: """Given two paths, return if path is a sub-path of dir.""" if not dir.endswith(os.sep): dir += os.sep - ret = path.startswith(dir) - - import pathlib - - if ret != (pathlib.Path(dir) in pathlib.Path(path).parents): - raise AssertionError(f"mismatch for {path!r} and {dir!r}") - - return ret + return path.startswith(dir) if sys.platform == "linux" or sys.platform == "darwin": From 8cbe3bf0b9d4c68e79dc88a19173fcfb22b43863 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Tue, 15 Oct 2024 18:37:59 -0700 Subject: [PATCH 4/5] normcase --- mypy/modulefinder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mypy/modulefinder.py b/mypy/modulefinder.py index 59a71025f71e..67ca1c9dc89e 100644 --- a/mypy/modulefinder.py +++ b/mypy/modulefinder.py @@ -673,6 +673,7 @@ def default_lib_path( path: list[str] = [] if custom_typeshed_dir: + custom_typeshed_dir = os.path.normcase(custom_typeshed_dir) typeshed_dir = os.path.join(custom_typeshed_dir, "stdlib") mypy_extensions_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-extensions") versions_file = os.path.join(typeshed_dir, "VERSIONS") From 808087a7aa7dad9a3cc86e49cda79a3648b09134 Mon Sep 17 00:00:00 2001 From: hauntsaninja Date: Wed, 16 Oct 2024 11:36:53 -0700 Subject: [PATCH 5/5] document and guarantee path requirements --- mypy/build.py | 9 ++++----- mypy/modulefinder.py | 10 +++++++--- mypy/util.py | 19 +++++++++++++++++-- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 043b52f0a241..3c68519664cb 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -59,7 +59,7 @@ get_mypy_comments, hash_digest, is_stub_package_file, - is_sub_path, + is_sub_path_normabs, is_typeshed_file, module_prefix, read_py_file, @@ -3544,10 +3544,9 @@ def is_silent_import_module(manager: BuildManager, path: str) -> bool: if manager.options.no_silence_site_packages: return False # Silence errors in site-package dirs and typeshed - return any( - is_sub_path(path, dir) - for dir in manager.search_paths.package_path + manager.search_paths.typeshed_path - ) + if any(is_sub_path_normabs(path, dir) for dir in manager.search_paths.package_path): + return True + return any(is_sub_path_normabs(path, dir) for dir in manager.search_paths.typeshed_path) def write_undocumented_ref_info( diff --git a/mypy/modulefinder.py b/mypy/modulefinder.py index 67ca1c9dc89e..0cb4dc4b3cce 100644 --- a/mypy/modulefinder.py +++ b/mypy/modulefinder.py @@ -669,11 +669,13 @@ def mypy_path() -> list[str]: def default_lib_path( data_dir: str, pyversion: tuple[int, int], custom_typeshed_dir: str | None ) -> list[str]: - """Return default standard library search paths.""" + """Return default standard library search paths. Guaranteed to be normalised.""" + + data_dir = os.path.abspath(data_dir) path: list[str] = [] if custom_typeshed_dir: - custom_typeshed_dir = os.path.normcase(custom_typeshed_dir) + custom_typeshed_dir = os.path.abspath(custom_typeshed_dir) typeshed_dir = os.path.join(custom_typeshed_dir, "stdlib") mypy_extensions_dir = os.path.join(custom_typeshed_dir, "stubs", "mypy-extensions") versions_file = os.path.join(typeshed_dir, "VERSIONS") @@ -713,7 +715,7 @@ def default_lib_path( @functools.lru_cache(maxsize=None) def get_search_dirs(python_executable: str | None) -> tuple[list[str], list[str]]: - """Find package directories for given python. + """Find package directories for given python. Guaranteed to return absolute paths. This runs a subprocess call, which generates a list of the directories in sys.path. To avoid repeatedly calling a subprocess (which can be slow!) we @@ -775,6 +777,7 @@ def compute_search_paths( root_dir = os.getenv("MYPY_TEST_PREFIX", None) if not root_dir: root_dir = os.path.dirname(os.path.dirname(__file__)) + root_dir = os.path.abspath(root_dir) lib_path.appendleft(os.path.join(root_dir, "test-data", "unit", "lib-stub")) # alt_lib_path is used by some tests to bypass the normal lib_path mechanics. # If we don't have one, grab directories of source files. @@ -831,6 +834,7 @@ def compute_search_paths( return SearchPaths( python_path=tuple(reversed(python_path)), mypy_path=tuple(mypypath), + # package_path and typeshed_path must be normalised and absolute via os.path.abspath package_path=tuple(sys_path + site_packages), typeshed_path=tuple(lib_path), ) diff --git a/mypy/util.py b/mypy/util.py index e29de456a98a..110fc543dea4 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -410,8 +410,23 @@ def replace_object_state( pass -def is_sub_path(path: str, dir: str) -> bool: - """Given two paths, return if path is a sub-path of dir.""" +def is_sub_path_normabs(path: str, dir: str) -> bool: + """Given two paths, return if path is a sub-path of dir. + + Moral equivalent of: Path(dir) in Path(path).parents + + Similar to the pathlib version: + - Treats paths case-sensitively + - Does not fully handle unnormalised paths (e.g. paths with "..") + - Does not handle a mix of absolute and relative paths + Unlike the pathlib version: + - Fast + - On Windows, assumes input has been slash normalised + - Handles even fewer unnormalised paths (e.g. paths with "." and "//") + + As a result, callers should ensure that inputs have had os.path.abspath called on them + (note that os.path.abspath will normalise) + """ if not dir.endswith(os.sep): dir += os.sep return path.startswith(dir)