python · msullivan · Feb 7, 2018 · Jan 16, 2018 · Feb 1, 2018 · Feb 6, 2018
diff --git a/mypy/build.py b/mypy/build.py
@@ -1131,6 +1131,17 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
     if not stat.S_ISREG(st.st_mode):
         manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path))
         return None
+
+    # When we are using a fine-grained cache, we want our initial
+    # build() to load all of the cache information and then do a
+    # fine-grained incremental update to catch anything that has
+    # changed since the cache was generated. We *don't* want to do a
+    # coarse-grained incremental rebuild, so we accept the cache
+    # metadata even if it doesn't match the source file.
+    if manager.options.use_fine_grained_cache:
+        manager.log('Using potentially stale metadata for {}'.format(id))
+        return meta
+
     size = st.st_size
     if size != meta.size:
         manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
@@ -2383,6 +2394,14 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
                 manager.log("Processing SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg))
             process_stale_scc(graph, scc, manager)
 
+    # If we are running in fine-grained incremental mode with caching,
+    # we always process fresh SCCs so that we have all of the symbol
+    # tables and fine-grained dependencies available.
+    if manager.options.use_fine_grained_cache:
+        for prev_scc in fresh_scc_queue:
+            process_fresh_scc(graph, prev_scc, manager)
+        fresh_scc_queue = []
+
     sccs_left = len(fresh_scc_queue)
     nodes_left = sum(len(scc) for scc in fresh_scc_queue)
     manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left)
@@ -2569,7 +2588,7 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
             graph[id].transitive_error = True
     for id in stale:
         graph[id].finish_passes()
-        if manager.options.cache_fine_grained:
+        if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
             graph[id].compute_fine_grained_deps()
         graph[id].generate_unused_ignore_notes()
         manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)

diff --git a/mypy/dmypy_server.py b/mypy/dmypy_server.py
@@ -24,7 +24,7 @@
 from mypy.dmypy_util import STATUS_FILE, receive
 from mypy.gclogger import GcLogger
 from mypy.fscache import FileSystemCache
-from mypy.fswatcher import FileSystemWatcher
+from mypy.fswatcher import FileSystemWatcher, FileData
 
 
 def daemonize(func: Callable[[], None], log_file: Optional[str] = None) -> int:
@@ -99,13 +99,18 @@ def __init__(self, flags: List[str]) -> None:
             sys.exit("dmypy: start/restart should not disable incremental mode")
         if options.quick_and_dirty:
             sys.exit("dmypy: start/restart should not specify quick_and_dirty mode")
+        if options.use_fine_grained_cache and not options.fine_grained_incremental:
+            sys.exit("dmypy: fine-grained cache can only be used in experimental mode")
         self.options = options
         if os.path.isfile(STATUS_FILE):
             os.unlink(STATUS_FILE)
         if self.fine_grained:
             options.incremental = True
             options.show_traceback = True
-            options.cache_dir = os.devnull
+            if options.use_fine_grained_cache:
+                options.cache_fine_grained = True  # set this so that cache options match
+            else:
+                options.cache_dir = os.devnull
 
     def serve(self) -> None:
         """Serve requests, synchronously (no thread or fork)."""
@@ -263,11 +268,29 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
         manager = result.manager
         graph = result.graph
         self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph)
-        status = 1 if messages else 0
-        self.previous_messages = messages[:]
         self.fine_grained_initialized = True
         self.previous_sources = sources
         self.fscache.flush()
+
+        # If we are using the fine-grained cache, build hasn't actually done
+        # the typechecking on the updated files yet.
+        # Run a fine-grained update starting from the cached data
+        if self.options.use_fine_grained_cache:
+            # Pull times and hashes out of the saved_cache and stick them into
+            # the fswatcher, so we pick up the changes.
+            for meta, mypyfile, type_map in manager.saved_cache.values():
+                if meta.mtime is None: continue
+                self.fswatcher.set_file_data(
+                    meta.path,
+                    FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))
+
+            # Run an update
+            changed = self.find_changed(sources)
+            if changed:
+                messages = self.fine_grained_manager.update(changed)
+
+        status = 1 if messages else 0
+        self.previous_messages = messages[:]
         return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}
 
     def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:

diff --git a/mypy/fswatcher.py b/mypy/fswatcher.py
@@ -36,6 +36,9 @@ def __init__(self, fs: FileSystemCache) -> None:
     def paths(self) -> AbstractSet[str]:
         return self._paths
 
+    def set_file_data(self, path: str, data: FileData) -> None:
+        self._file_data[path] = data
+
     def add_watched_paths(self, paths: Iterable[str]) -> None:
         for path in paths:
             if path not in self._paths:

diff --git a/mypy/main.py b/mypy/main.py
@@ -395,6 +395,8 @@ def add_invertible_flag(flag: str,
     if server_options:
         parser.add_argument('--experimental', action='store_true', dest='fine_grained_incremental',
                             help="enable fine-grained incremental mode")
+        parser.add_argument('--use-fine-grained-cache', action='store_true',
+                            help="use the cache in fine-grained incremental mode")
 
     report_group = parser.add_argument_group(
         title='report generation',

diff --git a/mypy/nodes.py b/mypy/nodes.py
@@ -203,6 +203,8 @@ class MypyFile(SymbolNode):
     ignored_lines = None  # type: Set[int]
     # Is this file represented by a stub file (.pyi)?
     is_stub = False
+    # Is this loaded from the cache and thus missing the actual body of the file?
+    is_cache_skeleton = False
 
     def __init__(self,
                  defs: List[Statement],
@@ -249,6 +251,7 @@ def deserialize(cls, data: JsonDict) -> 'MypyFile':
         tree.names = SymbolTable.deserialize(data['names'])
         tree.is_stub = data['is_stub']
         tree.path = data['path']
+        tree.is_cache_skeleton = True
         return tree
 
 

diff --git a/mypy/options.py b/mypy/options.py
@@ -144,6 +144,7 @@ def __init__(self) -> None:
         self.skip_version_check = False
         self.fine_grained_incremental = False
         self.cache_fine_grained = False
+        self.use_fine_grained_cache = False
 
         # Paths of user plugins
         self.plugins = []  # type: List[str]

diff --git a/mypy/server/update.py b/mypy/server/update.py
@@ -170,6 +170,10 @@ def __init__(self,
         self.blocking_error = None  # type: Optional[Tuple[str, str]]
         # Module that we haven't processed yet but that are known to be stale.
         self.stale = []  # type: List[Tuple[str, str]]
+        # Disable the cache so that load_graph doesn't try going back to disk
+        # for the cache. This is kind of a hack and it might be better to have
+        # this directly reflected in load_graph's interface.
+        self.options.cache_dir = os.devnull
         mark_all_meta_as_memory_only(graph, manager)
         manager.saved_cache = preserve_full_cache(graph, manager)
         self.type_maps = extract_type_maps(graph)
@@ -281,9 +285,10 @@ def update_single(self, module: str, path: str) -> Tuple[List[str],
             print('triggered:', sorted(filtered))
         self.triggered.extend(triggered | self.previous_targets_with_errors)
         collect_dependencies({module: tree}, self.deps, graph)
-        propagate_changes_using_dependencies(manager, graph, self.deps, triggered,
-                                             {module},
-                                             self.previous_targets_with_errors)
+        remaining += propagate_changes_using_dependencies(
+            manager, graph, self.deps, triggered,
+            {module},
+            self.previous_targets_with_errors)
 
         # Preserve state needed for the next update.
         self.previous_targets_with_errors = manager.errors.targets()
@@ -318,6 +323,7 @@ def mark_all_meta_as_memory_only(graph: Dict[str, State],
 def get_all_dependencies(manager: BuildManager, graph: Dict[str, State],
                          options: Options) -> Dict[str, Set[str]]:
     """Return the fine-grained dependency map for an entire build."""
+    # Deps for each module were computed during build() or loaded from the cache.
     deps = {}  # type: Dict[str, Set[str]]
     collect_dependencies(manager.modules, deps, graph)
     return deps
@@ -374,7 +380,7 @@ def update_single_isolated(module: str,
     sources = get_sources(previous_modules, [(module, path)])
     invalidate_stale_cache_entries(manager.saved_cache, [(module, path)])
 
-    manager.missing_modules = set()
+    manager.missing_modules.clear()
     try:
         graph = load_graph(sources, manager)
     except CompileError as err:
@@ -441,6 +447,7 @@ def update_single_isolated(module: str,
     # Perform type checking.
     state.type_check_first_pass()
     state.type_check_second_pass()
+    state.compute_fine_grained_deps()
     state.finish_passes()
     # TODO: state.write_cache()?
     # TODO: state.mark_as_rechecked()?
@@ -492,7 +499,8 @@ def delete_module(module_id: str,
     # TODO: Remove deps for the module (this only affects memory use, not correctness)
     assert module_id not in graph
     new_graph = graph.copy()
-    del manager.modules[module_id]
+    if module_id in manager.modules:
+        del manager.modules[module_id]
     if module_id in manager.saved_cache:
         del manager.saved_cache[module_id]
     components = module_id.split('.')
@@ -654,7 +662,6 @@ def collect_dependencies(new_modules: Mapping[str, Optional[MypyFile]],
     for id, node in new_modules.items():
         if node is None:
             continue
-        graph[id].compute_fine_grained_deps()
         for trigger, targets in graph[id].fine_grained_deps.items():
             deps.setdefault(trigger, set()).update(targets)
 
@@ -711,9 +718,15 @@ def propagate_changes_using_dependencies(
         deps: Dict[str, Set[str]],
         triggered: Set[str],
         up_to_date_modules: Set[str],
-        targets_with_errors: Set[str]) -> None:
+        targets_with_errors: Set[str]) -> List[Tuple[str, str]]:
+    """Transitively rechecks targets based on triggers and the dependency map.
+
+    Returns a list (module id, path) tuples representing modules that contain
+    a target that needs to be reprocessed but that has not been parsed yet."""
+
     # TODO: Multiple type checking passes
     num_iter = 0
+    remaining_modules = []
 
     # Propagate changes until nothing visible has changed during the last
     # iteration.
@@ -737,7 +750,13 @@ def propagate_changes_using_dependencies(
         # TODO: Preserve order (set is not optimal)
         for id, nodes in sorted(todo.items(), key=lambda x: x[0]):
             assert id not in up_to_date_modules
-            triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
+            if manager.modules[id].is_cache_skeleton:
+                # We have only loaded the cache for this file, not the actual file,
+                # so we can't access the nodes to reprocess.
+                # Add it to the queue of files that need to be processed fully.
+                remaining_modules.append((id, manager.modules[id].path))
+            else:
+                triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
         # Changes elsewhere may require us to reprocess modules that were
         # previously considered up to date. For example, there may be a
         # dependency loop that loops back to an originally processed module.
@@ -746,6 +765,8 @@ def propagate_changes_using_dependencies(
         if DEBUG:
             print('triggered:', list(triggered))
 
+    return remaining_modules
+
 
 def find_targets_recursive(
         triggers: Set[str],
@@ -993,4 +1014,6 @@ def lookup_target(modules: Dict[str, MypyFile], target: str) -> List[DeferredNod
 
 
 def extract_type_maps(graph: Graph) -> Dict[str, Dict[Expression, Type]]:
-    return {id: state.type_map() for id, state in graph.items()}
+    # This is used to export information used only by the testmerge harness.
+    return {id: state.type_map() for id, state in graph.items()
+            if state.tree}
diff --git a/mypy/test/testdmypy.py b/mypy/test/testdmypy.py
@@ -120,6 +120,7 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) ->
             server_options = []  # type: List[str]
             if 'fine-grained' in testcase.file:
                 server_options.append('--experimental')
+                options.fine_grained_incremental = True
             self.server = dmypy_server.Server(server_options)  # TODO: Fix ugly API
             self.server.options = options
 

diff --git a/mypy/test/testfinegrained.py b/mypy/test/testfinegrained.py
@@ -30,6 +30,7 @@
 from mypy.test.testtypegen import ignore_node
 from mypy.types import TypeStrVisitor, Type
 from mypy.util import short_type
+import pytest  # type: ignore  # no pytest in typeshed
 
 
 class FineGrainedSuite(DataSuite):
@@ -41,17 +42,44 @@ class FineGrainedSuite(DataSuite):
     ]
     base_path = test_temp_dir
     optional_out = True
+    # Whether to use the fine-grained cache in the testing. This is overridden
+    # by a trivial subclass to produce a suite that uses the cache.
+    use_cache = False
+
+    # Decide whether to skip the test. This could have been structured
+    # as a filter() classmethod also, but we want the tests reported
+    # as skipped, not just elided.
+    def should_skip(self, testcase: DataDrivenTestCase) -> bool:
+        if self.use_cache:
+            if testcase.name.endswith("-skip-cache"):
+                return True
+            # TODO: In caching mode we currently don't well support
+            # starting from cached states with errors in them.
+            if testcase.output and testcase.output[0] != '==':
+                return True
+        else:
+            if testcase.name.endswith("-skip-nocache"):
+                return True
+
+        return False
 
     def run_case(self, testcase: DataDrivenTestCase) -> None:
+        if self.should_skip(testcase):
+            pytest.skip()
+            return
+
         main_src = '\n'.join(testcase.input)
         sources_override = self.parse_sources(main_src)
-        messages, manager, graph = self.build(main_src, testcase, sources_override)
-
+        messages, manager, graph = self.build(main_src, testcase, sources_override,
+                                              build_cache=self.use_cache,
+                                              enable_cache=self.use_cache)
         a = []
         if messages:
             a.extend(normalize_messages(messages))
 
-        fine_grained_manager = FineGrainedBuildManager(manager, graph)
+        fine_grained_manager = None
+        if not self.use_cache:
+            fine_grained_manager = FineGrainedBuildManager(manager, graph)
 
         steps = testcase.find_steps()
         all_triggered = []
@@ -70,6 +98,14 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
                 modules = [(module, path)
                            for module, path in sources_override
                            if any(m == module for m, _ in modules)]
+
+            # If this is the second iteration and we are using a
+            # cache, now we need to set it up
+            if fine_grained_manager is None:
+                messages, manager, graph = self.build(main_src, testcase, sources_override,
+                                                      build_cache=False, enable_cache=True)
+                fine_grained_manager = FineGrainedBuildManager(manager, graph)
+
             new_messages = fine_grained_manager.update(modules)
             all_triggered.append(fine_grained_manager.triggered)
             new_messages = normalize_messages(new_messages)
@@ -82,8 +118,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
 
         assert_string_arrays_equal(
             testcase.output, a,
-            'Invalid output ({}, line {})'.format(testcase.file,
-                                                  testcase.line))
+            'Invalid output ({}, line {})'.format(
+                testcase.file, testcase.line))
 
         if testcase.triggered:
             assert_string_arrays_equal(
@@ -95,14 +131,18 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
     def build(self,
               source: str,
               testcase: DataDrivenTestCase,
-              sources_override: Optional[List[Tuple[str, str]]]) -> Tuple[List[str],
-                                                                          BuildManager,
-                                                                          Graph]:
+              sources_override: Optional[List[Tuple[str, str]]],
+              build_cache: bool,
+              enable_cache: bool) -> Tuple[List[str], BuildManager, Graph]:
         # This handles things like '# flags: --foo'.
         options = parse_options(source, testcase, incremental_step=1)
         options.incremental = True
         options.use_builtins_fixtures = True
         options.show_traceback = True
+        options.fine_grained_incremental = not build_cache
+        options.use_fine_grained_cache = enable_cache and not build_cache
+        options.cache_fine_grained = enable_cache
+
         main_path = os.path.join(test_temp_dir, 'main')
         with open(main_path, 'w') as f:
             f.write(source)

diff --git a/mypy/test/testfinegrainedcache.py b/mypy/test/testfinegrainedcache.py
@@ -0,0 +1,12 @@
+"""Tests for fine-grained incremental checking using the cache.
+
+All of the real code for this lives in testfinegrained.py.
+"""
+
+# We can't "import FineGrainedSuite from ..." because that will cause pytest
+# to collect the non-caching tests when running this file.
+import mypy.test.testfinegrained
+
+
+class FineGrainedCacheSuite(mypy.test.testfinegrained.FineGrainedSuite):
+    use_cache = True
diff --git a/mypy/test/testmerge.py b/mypy/test/testmerge.py
@@ -99,6 +99,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
     def build(self, source: str) -> Tuple[List[str], Optional[BuildManager], Dict[str, State]]:
         options = Options()
         options.incremental = True
+        options.fine_grained_incremental = True
         options.use_builtins_fixtures = True
         options.show_traceback = True
         main_path = os.path.join(test_temp_dir, 'main')