Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mypy: allow explicit specification of package roots #9632

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 69 additions & 49 deletions mypy/find_sources.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""Routines for finding the sources that mypy will check"""

import os.path
import functools
import os

from typing import List, Sequence, Set, Tuple, Optional, Dict
from typing import List, Sequence, Set, Tuple, Optional
from typing_extensions import Final

from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS
from mypy.modulefinder import BuildSource, PYTHON_EXTENSIONS, mypy_path
from mypy.fscache import FileSystemCache
from mypy.options import Options
from mypy.util import normalise_package_root

PY_EXTENSIONS = tuple(PYTHON_EXTENSIONS) # type: Final

Expand All @@ -24,7 +26,7 @@ def create_source_list(paths: Sequence[str], options: Options,
Raises InvalidSourceList on errors.
"""
fscache = fscache or FileSystemCache()
finder = SourceFinder(fscache)
finder = SourceFinder(fscache, options)

sources = []
for path in paths:
Expand All @@ -34,7 +36,7 @@ def create_source_list(paths: Sequence[str], options: Options,
name, base_dir = finder.crawl_up(path)
sources.append(BuildSource(path, name, None, base_dir))
elif fscache.isdir(path):
sub_sources = finder.find_sources_in_dir(path, explicit_package_roots=None)
sub_sources = finder.find_sources_in_dir(path)
if not sub_sources and not allow_empty_dir:
raise InvalidSourceList(
"There are no .py[i] files in directory '{}'".format(path)
Expand All @@ -58,38 +60,51 @@ def keyfunc(name: str) -> Tuple[int, str]:
return (-1, name)


def get_explicit_package_roots(options: Options) -> Optional[List[str]]:
if not options.package_root:
return None
roots = options.package_root + mypy_path() + options.mypy_path + [os.getcwd()]
return [normalise_package_root(root) for root in roots]


class SourceFinder:
def __init__(self, fscache: FileSystemCache) -> None:
def __init__(self, fscache: FileSystemCache, options: Options) -> None:
self.fscache = fscache
# A cache for package names, mapping from directory path to module id and base dir
self.package_cache = {} # type: Dict[str, Tuple[str, str]]
self.explicit_package_roots = get_explicit_package_roots(options)
self.namespace_packages = options.namespace_packages

def find_sources_in_dir(
self, path: str, explicit_package_roots: Optional[List[str]]
) -> List[BuildSource]:
if explicit_package_roots is None:
mod_prefix, root_dir = self.crawl_up_dir(path)
else:
mod_prefix = os.path.basename(path)
root_dir = os.path.dirname(path) or "."
def is_package_root(self, path: str) -> bool:
assert self.explicit_package_roots
return normalise_package_root(path) in self.explicit_package_roots

def find_sources_in_dir(self, path: str) -> List[BuildSource]:
mod_prefix, root_dir = self.crawl_up_dir(path)
if mod_prefix:
mod_prefix += "."
return self.find_sources_in_dir_helper(path, mod_prefix, root_dir, explicit_package_roots)
return self.find_sources_in_dir_helper(path, mod_prefix, root_dir)

def find_sources_in_dir_helper(
self, dir_path: str, mod_prefix: str, root_dir: str,
explicit_package_roots: Optional[List[str]]
self, dir_path: str, mod_prefix: str, root_dir: str
) -> List[BuildSource]:
assert not mod_prefix or mod_prefix.endswith(".")

init_file = self.get_init_file(dir_path)

is_package_root = False
# If the current directory is an explicit package root, explore it as such.
if self.explicit_package_roots is not None and self.is_package_root(dir_path):
is_package_root = True
# Alternatively, if we aren't given explicit package roots and we don't have an __init__
# file, recursively explore this directory as a new package root.
if (
(explicit_package_roots is not None and dir_path in explicit_package_roots)
or (explicit_package_roots is None and init_file is None)
):
# file, *conditionally* recursively explore this directory as a new package root...
elif self.explicit_package_roots is None and init_file is None:
# ...if namespace packages is False, we always consider this a new package root
# ...if namespace packages is True, we consider this a new package root only if we're
# not already exploring a package. This allows us to have reasonable behaviour in the
# face of missing __init__ files, without having to specify explicit package roots.
if not self.namespace_packages or mod_prefix == "":
is_package_root = True

if is_package_root:
mod_prefix = ""
root_dir = dir_path

Expand All @@ -109,7 +124,7 @@ def find_sources_in_dir_helper(

if self.fscache.isdir(path):
sub_sources = self.find_sources_in_dir_helper(
path, mod_prefix + name + '.', root_dir, explicit_package_roots
path, mod_prefix + name + '.', root_dir
)
if sub_sources:
seen.add(name)
Expand All @@ -126,10 +141,12 @@ def find_sources_in_dir_helper(
return sources

def crawl_up(self, path: str) -> Tuple[str, str]:
"""Given a .py[i] filename, return module and base directory
"""Given a .py[i] filename, return module and base directory.

If we are given explicit package roots, we crawl up until we find one (or run out of
path components).

We crawl up the path until we find a directory without
__init__.py[i], or until we run out of path components.
Otherwise, we crawl up the path until we find an directory without __init__.py[i]
"""
parent, filename = os.path.split(path)
module_name = strip_py(filename) or os.path.basename(filename)
Expand All @@ -141,28 +158,33 @@ def crawl_up(self, path: str) -> Tuple[str, str]:

return module, base_dir

# Add a cache in case many files are passed to mypy
@functools.lru_cache()
def crawl_up_dir(self, dir: str) -> Tuple[str, str]:
"""Given a directory name, return the corresponding module name and base directory

Use package_cache to cache results.
"""
if dir in self.package_cache:
return self.package_cache[dir]
"""Given a directory name, return the corresponding module name and base directory."""
if self.explicit_package_roots is not None:
if self.is_package_root(dir):
return "", dir

parent_dir, base = os.path.split(dir)
if not dir or not self.get_init_file(dir) or not base:
module = ''
base_dir = dir or '.'
else:
# Ensure that base is a valid python module name
if base.endswith('-stubs'):
base = base[:-6] # PEP-561 stub-only directory
if not base.isidentifier():
raise InvalidSourceList('{} is not a valid Python package name'.format(base))
parent_module, base_dir = self.crawl_up_dir(parent_dir)
module = module_join(parent_module, base)

self.package_cache[dir] = module, base_dir
if (
not dir or not base
# In the absence of explicit package roots, a lack of __init__.py means we've reached
# an (implicit) package root
or (self.explicit_package_roots is None and not self.get_init_file(dir))
):
module = ""
base_dir = dir or "."
return module, base_dir

# Ensure that base is a valid python module name
if base.endswith('-stubs'):
base = base[:-6] # PEP-561 stub-only directory
if not base.isidentifier():
raise InvalidSourceList('{} is not a valid Python package name'.format(base))

parent_module, base_dir = self.crawl_up_dir(parent_dir)
module = module_join(parent_module, base)
return module, base_dir

def get_init_file(self, dir: str) -> Optional[str]:
Expand All @@ -176,8 +198,6 @@ def get_init_file(self, dir: str) -> Optional[str]:
f = os.path.join(dir, '__init__' + ext)
if self.fscache.isfile(f):
return f
if ext == '.py' and self.fscache.init_under_package_root(f):
return f
return None


Expand Down
113 changes: 8 additions & 105 deletions mypy/fscache.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,14 @@

import os
import stat
from typing import Dict, List, Set
from typing import Dict, List
from mypy.util import hash_digest


class FileSystemCache:
def __init__(self) -> None:
# The package root is not flushed with the caches.
# It is set by set_package_root() below.
self.package_root = [] # type: List[str]
self.flush()

def set_package_root(self, package_root: List[str]) -> None:
self.package_root = package_root

def flush(self) -> None:
"""Start another transaction and empty all caches."""
self.stat_cache = {} # type: Dict[str, os.stat_result]
Expand All @@ -54,7 +48,6 @@ def flush(self) -> None:
self.read_cache = {} # type: Dict[str, bytes]
self.read_error_cache = {} # type: Dict[str, Exception]
self.hash_cache = {} # type: Dict[str, str]
self.fake_package_cache = set() # type: Set[str]

def stat(self, path: str) -> os.stat_result:
if path in self.stat_cache:
Expand All @@ -64,100 +57,17 @@ def stat(self, path: str) -> os.stat_result:
try:
st = os.stat(path)
except OSError as err:
if self.init_under_package_root(path):
try:
return self._fake_init(path)
except OSError:
pass
# Take a copy to get rid of associated traceback and frame objects.
# Just assigning to __traceback__ doesn't free them.
self.stat_error_cache[path] = copy_os_error(err)
raise err
self.stat_cache[path] = st
return st

def init_under_package_root(self, path: str) -> bool:
"""Is this path an __init__.py under a package root?

This is used to detect packages that don't contain __init__.py
files, which is needed to support Bazel. The function should
only be called for non-existing files.

It will return True if it refers to a __init__.py file that
Bazel would create, so that at runtime Python would think the
directory containing it is a package. For this to work you
must pass one or more package roots using the --package-root
flag.

As an exceptional case, any directory that is a package root
itself will not be considered to contain a __init__.py file.
This is different from the rules Bazel itself applies, but is
necessary for mypy to properly distinguish packages from other
directories.

See https://docs.bazel.build/versions/master/be/python.html,
where this behavior is described under legacy_create_init.
"""
if not self.package_root:
return False
dirname, basename = os.path.split(path)
if basename != '__init__.py':
return False
try:
st = self.stat(dirname)
except OSError:
return False
else:
if not stat.S_ISDIR(st.st_mode):
return False
ok = False
drive, path = os.path.splitdrive(path) # Ignore Windows drive name
path = os.path.normpath(path)
for root in self.package_root:
if path.startswith(root):
if path == root + basename:
# A package root itself is never a package.
ok = False
break
else:
ok = True
return ok

def _fake_init(self, path: str) -> os.stat_result:
"""Prime the cache with a fake __init__.py file.

This makes code that looks for path believe an empty file by
that name exists. Should only be called after
init_under_package_root() returns True.
"""
dirname, basename = os.path.split(path)
assert basename == '__init__.py', path
assert not os.path.exists(path), path # Not cached!
dirname = os.path.normpath(dirname)
st = self.stat(dirname) # May raise OSError
# Get stat result as a sequence so we can modify it.
# (Alas, typeshed's os.stat_result is not a sequence yet.)
tpl = tuple(st) # type: ignore[arg-type, var-annotated]
seq = list(tpl) # type: List[float]
seq[stat.ST_MODE] = stat.S_IFREG | 0o444
seq[stat.ST_INO] = 1
seq[stat.ST_NLINK] = 1
seq[stat.ST_SIZE] = 0
tpl = tuple(seq)
st = os.stat_result(tpl)
self.stat_cache[path] = st
# Make listdir() and read() also pretend this file exists.
self.fake_package_cache.add(dirname)
return st

def listdir(self, path: str) -> List[str]:
path = os.path.normpath(path)
if path in self.listdir_cache:
res = self.listdir_cache[path]
# Check the fake cache.
if path in self.fake_package_cache and '__init__.py' not in res:
res.append('__init__.py') # Updates the result as well as the cache
return res
return self.listdir_cache[path]
if path in self.listdir_error_cache:
raise copy_os_error(self.listdir_error_cache[path])
try:
Expand All @@ -167,9 +77,6 @@ def listdir(self, path: str) -> List[str]:
self.listdir_error_cache[path] = copy_os_error(err)
raise err
self.listdir_cache[path] = results
# Check the fake cache.
if path in self.fake_package_cache and '__init__.py' not in results:
results.append('__init__.py')
return results

def isfile(self, path: str) -> bool:
Expand Down Expand Up @@ -245,16 +152,12 @@ def read(self, path: str) -> bytes:

dirname, basename = os.path.split(path)
dirname = os.path.normpath(dirname)
# Check the fake cache.
if basename == '__init__.py' and dirname in self.fake_package_cache:
data = b''
else:
try:
with open(path, 'rb') as f:
data = f.read()
except OSError as err:
self.read_error_cache[path] = err
raise
try:
with open(path, 'rb') as f:
data = f.read()
except OSError as err:
self.read_error_cache[path] = err
raise

self.read_cache[path] = data
self.hash_cache[path] = hash_digest(data)
Expand Down
Loading