Skip to content

Commit

Permalink
More improvements to following imports in mypy daemon (#8616)
Browse files Browse the repository at this point in the history
This includes a bunch of changes to following imports:

* Fix bug with cached modules without ASTs by loading the AST as needed
* Improve docstrings and simplify code
* Rename `follow_imports`
* Merge `seen` and `sources_set`
* Address some TODO items

This may be easier to review by looking at individual commits.

I think that following imports is ready to be enabled after this has been
merged. We can always revert the change before the next release if we
we encounter major issues.

Major things I know about that still don't work (I'll try to address at least the 
first one before the next release):

* `-m` and `-p` don't work correctly with dmypy (I think that happens also
  in other modes, but might be worse when following imports)
* Suppressed modules are incorrect with namespace modules (happens
  also in other modes)
* File events are not supported (but these are undocumented, so it's
  not critical?)
* Performance with a huge number of files is unknown (other import modes
  have better performance and can be used as fallbacks if this is a problem)

Work towards #5870.

Continues progress made in #8590.
  • Loading branch information
JukkaL authored Apr 3, 2020
1 parent f4351ba commit 20bc0b4
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 50 deletions.
98 changes: 55 additions & 43 deletions mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,25 +534,23 @@ def fine_grained_increment_follow_imports(self, sources: List[BuildSource]) -> L

orig_modules = list(graph.keys())

# TODO: Are the differences from fine_grained_increment(), necessary?
self.update_sources(sources)
changed_paths = self.fswatcher.find_changed()
manager.search_paths = compute_search_paths(sources, manager.options, manager.data_dir)

t1 = time.time()
manager.log("fine-grained increment: find_changed: {:.3f}s".format(t1 - t0))

sources_set = {source.module for source in sources}
seen = {source.module for source in sources}

# Find changed modules reachable from roots (or in roots) already in graph.
seen = set() # type: Set[str]
changed, removed, new_files = self.follow_imports(
sources, graph, seen, changed_paths, sources_set
changed, new_files = self.find_reachable_changed_modules(
sources, graph, seen, changed_paths
)
sources.extend(new_files)

# Process changes directly reachable from roots.
messages = fine_grained_manager.update(changed, removed)
messages = fine_grained_manager.update(changed, [])

# Follow deps from changed modules (still within graph).
worklist = changed[:]
Expand All @@ -561,31 +559,31 @@ def fine_grained_increment_follow_imports(self, sources: List[BuildSource]) -> L
if module[0] not in graph:
continue
sources2 = self.direct_imports(module, graph)
changed, removed, new_files = self.follow_imports(
sources2, graph, seen, changed_paths, sources_set
changed, new_files = self.find_reachable_changed_modules(
sources2, graph, seen, changed_paths
)
sources.extend(new_files)
self.update_sources(new_files)
messages = fine_grained_manager.update(changed, removed)
# TODO: Removed?
messages = fine_grained_manager.update(changed, [])
worklist.extend(changed)

t2 = time.time()

for module_id, state in graph.items():
refresh_suppressed_submodules(module_id, state.path, fine_grained_manager.deps, graph,
self.fscache)
def refresh_file(module: str, path: str) -> List[str]:
return fine_grained_manager.update([(module, path)], [])

for module_id, state in list(graph.items()):
new_messages = refresh_suppressed_submodules(
module_id, state.path, fine_grained_manager.deps, graph, self.fscache, refresh_file
)
if new_messages is not None:
messages = new_messages

t3 = time.time()

# There may be new files that became available, currently treated as
# suppressed imports. Process them.
seen_suppressed = set() # type: Set[str]
while True:
# TODO: Merge seen and seen_suppressed?
new_unsuppressed, seen_suppressed = self.find_added_suppressed(
graph, seen_suppressed, manager.search_paths
)
new_unsuppressed = self.find_added_suppressed(graph, seen, manager.search_paths)
if not new_unsuppressed:
break
new_files = [BuildSource(mod[1], mod[0]) for mod in new_unsuppressed]
Expand All @@ -594,8 +592,15 @@ def fine_grained_increment_follow_imports(self, sources: List[BuildSource]) -> L
messages = fine_grained_manager.update(new_unsuppressed, [])

for module_id, path in new_unsuppressed:
refresh_suppressed_submodules(module_id, path, fine_grained_manager.deps, graph,
self.fscache)
new_messages = refresh_suppressed_submodules(
module_id, path,
fine_grained_manager.deps,
graph,
self.fscache,
refresh_file
)
if new_messages is not None:
messages = new_messages

t4 = time.time()

Expand All @@ -604,7 +609,7 @@ def fine_grained_increment_follow_imports(self, sources: List[BuildSource]) -> L
for module_id in orig_modules:
if module_id not in graph:
continue
if module_id not in seen and module_id not in seen_suppressed:
if module_id not in seen:
module_path = graph[module_id].path
assert module_path is not None
to_delete.append((module_id, module_path))
Expand All @@ -631,33 +636,35 @@ def fine_grained_increment_follow_imports(self, sources: List[BuildSource]) -> L

return messages

def follow_imports(self,
sources: List[BuildSource],
graph: mypy.build.Graph,
seen: Set[str],
changed_paths: AbstractSet[str],
sources_set: Set[str]) -> Tuple[List[Tuple[str, str]],
List[Tuple[str, str]],
List[BuildSource]]:
"""Follow imports within graph from given sources.
def find_reachable_changed_modules(
self,
roots: List[BuildSource],
graph: mypy.build.Graph,
seen: Set[str],
changed_paths: AbstractSet[str]) -> Tuple[List[Tuple[str, str]],
List[BuildSource]]:
"""Follow imports within graph from given sources until hitting changed modules.
If we find a changed module, we can't continue following imports as the imports
may have changed.
Args:
sources: roots of modules to search
roots: modules where to start search from
graph: module graph to use for the search
seen: modules we've seen before that won't be visited (mutated here!)
seen: modules we've seen before that won't be visited (mutated here!!)
changed_paths: which paths have changed (stop search here and return any found)
sources_set: set of sources (TODO: relationship with seen)
Return (reachable changed modules, removed modules, updated file list).
Return (encountered reachable changed modules,
unchanged files not in sources_set traversed).
"""
changed = []
new_files = []
worklist = sources[:]
worklist = roots[:]
seen.update(source.module for source in worklist)
while worklist:
nxt = worklist.pop()
if nxt.module not in sources_set:
sources_set.add(nxt.module)
if nxt.module not in seen:
seen.add(nxt.module)
new_files.append(nxt)
if nxt.path in changed_paths:
assert nxt.path is not None # TODO
Expand All @@ -669,7 +676,7 @@ def follow_imports(self,
seen.add(dep)
worklist.append(BuildSource(graph[dep].path,
graph[dep].id))
return changed, [], new_files
return changed, new_files

def direct_imports(self,
module: Tuple[str, str],
Expand All @@ -682,8 +689,14 @@ def direct_imports(self,
def find_added_suppressed(self,
graph: mypy.build.Graph,
seen: Set[str],
search_paths: SearchPaths) -> Tuple[List[Tuple[str, str]], Set[str]]:
"""Find suppressed modules that have been added (and not included in seen)."""
search_paths: SearchPaths) -> List[Tuple[str, str]]:
"""Find suppressed modules that have been added (and not included in seen).
Args:
seen: reachable modules we've seen before (mutated here!!)
Return suppressed, added modules.
"""
all_suppressed = set()
for module, state in graph.items():
all_suppressed |= state.suppressed_set
Expand All @@ -693,7 +706,6 @@ def find_added_suppressed(self,
all_suppressed = {module for module in all_suppressed if module not in graph}

# TODO: Namespace packages
# TODO: Handle seen?

finder = FindModuleCache(search_paths, self.fscache, self.options)

Expand All @@ -705,7 +717,7 @@ def find_added_suppressed(self,
found.append((module, result))
seen.add(module)

return found, seen
return found

def increment_output(self,
messages: List[str],
Expand Down
31 changes: 24 additions & 7 deletions mypy/server/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@
import sys
import time
from typing import (
Dict, List, Set, Tuple, Union, Optional, NamedTuple, Sequence
Dict, List, Set, Tuple, Union, Optional, NamedTuple, Sequence, Callable
)
from typing_extensions import Final

Expand Down Expand Up @@ -1127,7 +1127,8 @@ def refresh_suppressed_submodules(
path: Optional[str],
deps: Dict[str, Set[str]],
graph: Graph,
fscache: FileSystemCache) -> None:
fscache: FileSystemCache,
refresh_file: Callable[[str, str], List[str]]) -> Optional[List[str]]:
"""Look for submodules that are now suppressed in target package.
If a submodule a.b gets added, we need to mark it as suppressed
Expand All @@ -1139,10 +1140,15 @@ def refresh_suppressed_submodules(
Args:
module: target package in which to look for submodules
path: path of the module
refresh_file: function that reads the AST of a module (returns error messages)
Return a list of errors from refresh_file() if it was called. If the
return value is None, we didn't call refresh_file().
"""
messages = None
if path is None or not path.endswith(INIT_SUFFIXES):
# Only packages have submodules.
return
return None
# Find any submodules present in the directory.
pkgdir = os.path.dirname(path)
for fnam in fscache.listdir(pkgdir):
Expand All @@ -1153,22 +1159,33 @@ def refresh_suppressed_submodules(
shortname = fnam.split('.')[0]
submodule = module + '.' + shortname
trigger = make_trigger(submodule)

# We may be missing the required fine-grained deps.
ensure_deps_loaded(module, deps, graph)

if trigger in deps:
for dep in deps[trigger]:
# TODO: <...> deps, etc.
# We can ignore <...> deps since a submodule can't trigger any.
state = graph.get(dep)
if not state:
# Maybe it's a non-top-level target. We only care about the module.
dep_module = module_prefix(graph, dep)
if dep_module is not None:
state = graph.get(dep_module)
if state:
# Is the file may missing an AST in case it's read from cache?
if state.tree is None:
# Create AST for the file. This may produce some new errors
# that we need to propagate.
assert state.path is not None
messages = refresh_file(state.id, state.path)
tree = state.tree
assert tree # TODO: What if doesn't exist?
assert tree # Will be fine, due to refresh_file() above
for imp in tree.imports:
if isinstance(imp, ImportFrom):
if (imp.id == module
and any(name == shortname for name, _ in imp.names)):
# TODO: Only if does not exist already
and any(name == shortname for name, _ in imp.names)
and submodule not in state.suppressed_set):
state.suppressed.append(submodule)
state.suppressed_set.add(submodule)
return messages
19 changes: 19 additions & 0 deletions test-data/unit/fine-grained-follow-imports.test
Original file line number Diff line number Diff line change
Expand Up @@ -700,3 +700,22 @@ import bar
==
src/foo.py:2: error: "str" not callable
src/bar.py:1: error: "int" not callable

[case testFollowImportsNormalSuppressedAffectsCachedFile-only_when_cache]
# flags: --follow-imports=normal
# cmd: mypy main.py

[file main.py]
from p import m # type: ignore
m.f(1)

[file p/__init__.py]

[file p/m.py.2]
# This change will trigger a cached file (main.py) through a supressed
# submodule, resulting in additional errors in main.py.
def f() -> None: pass

[out]
==
main.py:2: error: Too many arguments for "f"

0 comments on commit 20bc0b4

Please sign in to comment.