diff --git a/mypy/find_sources.py b/mypy/find_sources.py index d20f0ac9832f..a3d3d9323ce4 100644 --- a/mypy/find_sources.py +++ b/mypy/find_sources.py @@ -1,13 +1,15 @@ """Routines for finding the sources that mypy will check""" -import os.path +import functools +import os -from typing import List, Sequence, Set, Tuple, Optional, Dict +from typing import List, Sequence, Set, Tuple, Optional from typing_extensions import Final -from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS +from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS, mypy_path from mypy.fscache import FileSystemCache from mypy.options import Options +from mypy.util import normalise_package_root PY_EXTENSIONS = tuple(PYTHON_EXTENSIONS) # type: Final @@ -24,7 +26,7 @@ def create_source_list(paths: Sequence[str], options: Options, Raises InvalidSourceList on errors. """ fscache = fscache or FileSystemCache() - finder = SourceFinder(fscache) + finder = SourceFinder(fscache, options) sources = [] for path in paths: @@ -34,7 +36,7 @@ def create_source_list(paths: Sequence[str], options: Options, name, base_dir = finder.crawl_up(path) sources.append(BuildSource(path, name, None, base_dir)) elif fscache.isdir(path): - sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None) + sub_sources = finder.find_sources_in_dir(path) if not sub_sources and not allow_empty_dir: raise InvalidSourceList( "There are no .py[i] files in directory '{}'".format(path) @@ -58,38 +60,51 @@ def keyfunc(name: str) -> Tuple[int, str]: return (-1, name) +def get_explicit_package_roots(options: Options) -> Optional[List[str]]: + if not options.package_root: + return None + roots = options.package_root + mypy_path() + options.mypy_path + [os.getcwd()] + return [normalise_package_root(root) for root in roots] + + class SourceFinder: - def __init__(self, fscache: FileSystemCache) -> None: + def __init__(self, fscache: FileSystemCache, options: Options) -> None: self.fscache = fscache - # A cache for package names, mapping from directory path to module id and base dir - self.package_cache = {} # type: Dict[str, Tuple[str, str]] + self.explicit_package_roots = get_explicit_package_roots(options) + self.namespace_packages = options.namespace_packages - def find_sources_in_dir( - self, path: str, explicit_package_roots: Optional[List[str]] - ) -> List[BuildSource]: - if explicit_package_roots is None: - mod_prefix, root_dir = self.crawl_up_dir(path) - else: - mod_prefix = os.path.basename(path) - root_dir = os.path.dirname(path) or "." + def is_package_root(self, path: str) -> bool: + assert self.explicit_package_roots + return normalise_package_root(path) in self.explicit_package_roots + + def find_sources_in_dir(self, path: str) -> List[BuildSource]: + mod_prefix, root_dir = self.crawl_up_dir(path) if mod_prefix: mod_prefix += "." - return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots) + return self.find_sources_in_dir_helper(path, mod_prefix, root_dir) def find_sources_in_dir_helper( - self, dir_path: str, mod_prefix: str, root_dir: str, - explicit_package_roots: Optional[List[str]] + self, dir_path: str, mod_prefix: str, root_dir: str ) -> List[BuildSource]: assert not mod_prefix or mod_prefix.endswith(".") init_file = self.get_init_file(dir_path) + + is_package_root = False # If the current directory is an explicit package root, explore it as such. + if self.explicit_package_roots is not None and self.is_package_root(dir_path): + is_package_root = True # Alternatively, if we aren't given explicit package roots and we don't have an __init__ - # file, recursively explore this directory as a new package root. - if ( - (explicit_package_roots is not None and dir_path in explicit_package_roots) - or (explicit_package_roots is None and init_file is None) - ): + # file, *conditionally* recursively explore this directory as a new package root... + elif self.explicit_package_roots is None and init_file is None: + # ...if namespace packages is False, we always consider this a new package root + # ...if namespace packages is True, we consider this a new package root only if we're + # not already exploring a package. This allows us to have reasonable behaviour in the + # face of missing __init__ files, without having to specify explicit package roots. + if not self.namespace_packages or mod_prefix == "": + is_package_root = True + + if is_package_root: mod_prefix = "" root_dir = dir_path @@ -109,7 +124,7 @@ def find_sources_in_dir_helper( if self.fscache.isdir(path): sub_sources = self.find_sources_in_dir_helper( - path, mod_prefix + name + '.', root_dir, explicit_package_roots + path, mod_prefix + name + '.', root_dir ) if sub_sources: seen.add(name) @@ -126,10 +141,12 @@ def find_sources_in_dir_helper( return sources def crawl_up(self, path: str) -> Tuple[str, str]: - """Given a .py[i] filename, return module and base directory + """Given a .py[i] filename, return module and base directory. + + If we are given explicit package roots, we crawl up until we find one (or run out of + path components). - We crawl up the path until we find a directory without - __init__.py[i], or until we run out of path components. + Otherwise, we crawl up the path until we find an directory without __init__.py[i] """ parent, filename = os.path.split(path) module_name = strip_py(filename) or os.path.basename(filename) @@ -141,28 +158,33 @@ def crawl_up(self, path: str) -> Tuple[str, str]: return module, base_dir + # Add a cache in case many files are passed to mypy + @functools.lru_cache() def crawl_up_dir(self, dir: str) -> Tuple[str, str]: - """Given a directory name, return the corresponding module name and base directory - - Use package_cache to cache results. - """ - if dir in self.package_cache: - return self.package_cache[dir] + """Given a directory name, return the corresponding module name and base directory.""" + if self.explicit_package_roots is not None: + if self.is_package_root(dir): + return "", dir parent_dir, base = os.path.split(dir) - if not dir or not self.get_init_file(dir) or not base: - module = '' - base_dir = dir or '.' - else: - # Ensure that base is a valid python module name - if base.endswith('-stubs'): - base = base[:-6] # PEP-561 stub-only directory - if not base.isidentifier(): - raise InvalidSourceList('{} is not a valid Python package name'.format(base)) - parent_module, base_dir = self.crawl_up_dir(parent_dir) - module = module_join(parent_module, base) - - self.package_cache[dir] = module, base_dir + if ( + not dir or not base + # In the absence of explicit package roots, a lack of __init__.py means we've reached + # an (implicit) package root + or (self.explicit_package_roots is None and not self.get_init_file(dir)) + ): + module = "" + base_dir = dir or "." + return module, base_dir + + # Ensure that base is a valid python module name + if base.endswith('-stubs'): + base = base[:-6] # PEP-561 stub-only directory + if not base.isidentifier(): + raise InvalidSourceList('{} is not a valid Python package name'.format(base)) + + parent_module, base_dir = self.crawl_up_dir(parent_dir) + module = module_join(parent_module, base) return module, base_dir def get_init_file(self, dir: str) -> Optional[str]: @@ -176,8 +198,6 @@ def get_init_file(self, dir: str) -> Optional[str]: f = os.path.join(dir, '__init__' + ext) if self.fscache.isfile(f): return f - if ext == '.py' and self.fscache.init_under_package_root(f): - return f return None diff --git a/mypy/fscache.py b/mypy/fscache.py index 0677aaee7645..eba1807856ab 100644 --- a/mypy/fscache.py +++ b/mypy/fscache.py @@ -30,20 +30,14 @@ import os import stat -from typing import Dict, List, Set +from typing import Dict, List from mypy.util import hash_digest class FileSystemCache: def __init__(self) -> None: - # The package root is not flushed with the caches. - # It is set by set_package_root() below. - self.package_root = [] # type: List[str] self.flush() - def set_package_root(self, package_root: List[str]) -> None: - self.package_root = package_root - def flush(self) -> None: """Start another transaction and empty all caches.""" self.stat_cache = {} # type: Dict[str, os.stat_result] @@ -54,7 +48,6 @@ def flush(self) -> None: self.read_cache = {} # type: Dict[str, bytes] self.read_error_cache = {} # type: Dict[str, Exception] self.hash_cache = {} # type: Dict[str, str] - self.fake_package_cache = set() # type: Set[str] def stat(self, path: str) -> os.stat_result: if path in self.stat_cache: @@ -64,11 +57,6 @@ def stat(self, path: str) -> os.stat_result: try: st = os.stat(path) except OSError as err: - if self.init_under_package_root(path): - try: - return self._fake_init(path) - except OSError: - pass # Take a copy to get rid of associated traceback and frame objects. # Just assigning to __traceback__ doesn't free them. self.stat_error_cache[path] = copy_os_error(err) @@ -76,88 +64,10 @@ def stat(self, path: str) -> os.stat_result: self.stat_cache[path] = st return st - def init_under_package_root(self, path: str) -> bool: - """Is this path an __init__.py under a package root? - - This is used to detect packages that don't contain __init__.py - files, which is needed to support Bazel. The function should - only be called for non-existing files. - - It will return True if it refers to a __init__.py file that - Bazel would create, so that at runtime Python would think the - directory containing it is a package. For this to work you - must pass one or more package roots using the --package-root - flag. - - As an exceptional case, any directory that is a package root - itself will not be considered to contain a __init__.py file. - This is different from the rules Bazel itself applies, but is - necessary for mypy to properly distinguish packages from other - directories. - - See https://docs.bazel.build/versions/master/be/python.html, - where this behavior is described under legacy_create_init. - """ - if not self.package_root: - return False - dirname, basename = os.path.split(path) - if basename != '__init__.py': - return False - try: - st = self.stat(dirname) - except OSError: - return False - else: - if not stat.S_ISDIR(st.st_mode): - return False - ok = False - drive, path = os.path.splitdrive(path) # Ignore Windows drive name - path = os.path.normpath(path) - for root in self.package_root: - if path.startswith(root): - if path == root + basename: - # A package root itself is never a package. - ok = False - break - else: - ok = True - return ok - - def _fake_init(self, path: str) -> os.stat_result: - """Prime the cache with a fake __init__.py file. - - This makes code that looks for path believe an empty file by - that name exists. Should only be called after - init_under_package_root() returns True. - """ - dirname, basename = os.path.split(path) - assert basename == '__init__.py', path - assert not os.path.exists(path), path # Not cached! - dirname = os.path.normpath(dirname) - st = self.stat(dirname) # May raise OSError - # Get stat result as a sequence so we can modify it. - # (Alas, typeshed's os.stat_result is not a sequence yet.) - tpl = tuple(st) # type: ignore[arg-type, var-annotated] - seq = list(tpl) # type: List[float] - seq[stat.ST_MODE] = stat.S_IFREG | 0o444 - seq[stat.ST_INO] = 1 - seq[stat.ST_NLINK] = 1 - seq[stat.ST_SIZE] = 0 - tpl = tuple(seq) - st = os.stat_result(tpl) - self.stat_cache[path] = st - # Make listdir() and read() also pretend this file exists. - self.fake_package_cache.add(dirname) - return st - def listdir(self, path: str) -> List[str]: path = os.path.normpath(path) if path in self.listdir_cache: - res = self.listdir_cache[path] - # Check the fake cache. - if path in self.fake_package_cache and '__init__.py' not in res: - res.append('__init__.py') # Updates the result as well as the cache - return res + return self.listdir_cache[path] if path in self.listdir_error_cache: raise copy_os_error(self.listdir_error_cache[path]) try: @@ -167,9 +77,6 @@ def listdir(self, path: str) -> List[str]: self.listdir_error_cache[path] = copy_os_error(err) raise err self.listdir_cache[path] = results - # Check the fake cache. - if path in self.fake_package_cache and '__init__.py' not in results: - results.append('__init__.py') return results def isfile(self, path: str) -> bool: @@ -245,16 +152,12 @@ def read(self, path: str) -> bytes: dirname, basename = os.path.split(path) dirname = os.path.normpath(dirname) - # Check the fake cache. - if basename == '__init__.py' and dirname in self.fake_package_cache: - data = b'' - else: - try: - with open(path, 'rb') as f: - data = f.read() - except OSError as err: - self.read_error_cache[path] = err - raise + try: + with open(path, 'rb') as f: + data = f.read() + except OSError as err: + self.read_error_cache[path] = err + raise self.read_cache[path] = data self.hash_cache[path] = hash_digest(data) diff --git a/mypy/main.py b/mypy/main.py index 7de1f57dfece..66d4b97cfb88 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -907,7 +907,7 @@ def set_strict_flags() -> None: # Process --package-root. if options.package_root: - process_package_roots(fscache, parser, options) + process_package_roots(parser, options) # Process --cache-map. if special_opts.cache_map: @@ -957,19 +957,12 @@ def set_strict_flags() -> None: return targets, options -def process_package_roots(fscache: Optional[FileSystemCache], - parser: argparse.ArgumentParser, +def process_package_roots(parser: argparse.ArgumentParser, options: Options) -> None: """Validate and normalize package_root.""" - if fscache is None: - parser.error("--package-root does not work here (no fscache)") - assert fscache is not None # Since mypy doesn't know parser.error() raises. # Do some stuff with drive letters to make Windows happy (esp. tests). current_drive, _ = os.path.splitdrive(os.getcwd()) - dot = os.curdir - dotslash = os.curdir + os.sep dotdotslash = os.pardir + os.sep - trivial_paths = {dot, dotslash} package_root = [] for root in options.package_root: if os.path.isabs(root): @@ -977,19 +970,12 @@ def process_package_roots(fscache: Optional[FileSystemCache], drive, root = os.path.splitdrive(root) if drive and drive != current_drive: parser.error("Package root must be on current drive: %r" % (drive + root)) - # Empty package root is always okay. - if root: - root = os.path.relpath(root) # Normalize the heck out of it. - if root.startswith(dotdotslash): - parser.error("Package root cannot be above current directory: %r" % root) - if root in trivial_paths: - root = '' - elif not root.endswith(os.sep): - root = root + os.sep + + root = util.normalise_package_root(root) + if root.startswith(dotdotslash): + parser.error("Package root cannot be above current directory: %r" % root) package_root.append(root) options.package_root = package_root - # Pass the package root on the the filesystem cache. - fscache.set_package_root(package_root) def process_cache_map(parser: argparse.ArgumentParser, diff --git a/mypy/suggestions.py b/mypy/suggestions.py index 0a41b134db6f..b66ba6d6118d 100644 --- a/mypy/suggestions.py +++ b/mypy/suggestions.py @@ -220,7 +220,7 @@ def __init__(self, fgmanager: FineGrainedBuildManager, self.manager = fgmanager.manager self.plugin = self.manager.plugin self.graph = fgmanager.graph - self.finder = SourceFinder(self.manager.fscache) + self.finder = SourceFinder(self.manager.fscache, self.manager.options) self.give_json = json self.no_errors = no_errors diff --git a/mypy/util.py b/mypy/util.py index 214b5f428f9a..14cc81598271 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -478,6 +478,22 @@ def parse_gray_color(cup: bytes) -> str: return gray +def normalise_package_root(root: str) -> str: + # Empty package root is always okay. + if not root: + return '' + + dot = os.curdir + dotslash = os.curdir + os.sep + trivial_paths = {dot, dotslash} + root = os.path.relpath(root) # Normalize the heck out of it. + if root in trivial_paths: + return '' + if root.endswith(os.sep): + root = root[:-1] + return root + + class FancyFormatter: """Apply color and bold font to terminal output. diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 271b7c4f3e68..97633a0ec9c8 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -945,19 +945,19 @@ emarg/foo.py:1: error: Name 'fail' is not defined emarg/hatch/villip/mankangulisk.py:1: error: Name 'fail' is not defined [case testPackageRootEmpty] -# cmd: mypy --package-root= a/b/c.py main.py +# cmd: mypy --namespace-packages --package-root= a/b/c.py main.py [file a/b/c.py] [file main.py] import a.b.c [case testPackageRootNonEmpty] -# cmd: mypy --package-root=a/ a/b/c.py main.py +# cmd: mypy --namespace-packages --package-root=a/ a/b/c.py main.py [file a/b/c.py] [file main.py] import b.c [case testPackageRootMultiple1] -# cmd: mypy --package-root=. --package-root=a a/b/c.py d.py main.py +# cmd: mypy --namespace-packages --package-root=. --package-root=a a/b/c.py d.py main.py [file a/b/c.py] [file d.py] [file main.py] @@ -965,7 +965,7 @@ import b.c import d [case testPackageRootMultiple2] -# cmd: mypy --package-root=a/ --package-root=./ a/b/c.py d.py main.py +# cmd: mypy --namespace-packages --package-root=a/ --package-root=./ a/b/c.py d.py main.py [file a/b/c.py] [file d.py] [file main.py]