Skip to content

Caching for fine-grained incremental mode #4483

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,17 @@ def validate_meta(meta: Optional[CacheMeta], id: str, path: Optional[str],
if not stat.S_ISREG(st.st_mode):
manager.log('Metadata abandoned for {}: file {} does not exist'.format(id, path))
return None

# When we are using a fine-grained cache, we want our initial
# build() to load all of the cache information and then do a
# fine-grained incremental update to catch anything that has
# changed since the cache was generated. We *don't* want to do a
# coarse-grained incremental rebuild, so we accept the cache
# metadata even if it doesn't match the source file.
if manager.options.use_fine_grained_cache:
manager.log('Using potentially stale metadata for {}'.format(id))
return meta

size = st.st_size
if size != meta.size:
manager.log('Metadata abandoned for {}: file {} has different size'.format(id, path))
Expand Down Expand Up @@ -2383,6 +2394,14 @@ def process_graph(graph: Graph, manager: BuildManager) -> None:
manager.log("Processing SCC of size %d (%s) as %s" % (size, scc_str, fresh_msg))
process_stale_scc(graph, scc, manager)

# If we are running in fine-grained incremental mode with caching,
# we always process fresh SCCs so that we have all of the symbol
# tables and fine-grained dependencies available.
if manager.options.use_fine_grained_cache:
for prev_scc in fresh_scc_queue:
process_fresh_scc(graph, prev_scc, manager)
fresh_scc_queue = []

sccs_left = len(fresh_scc_queue)
nodes_left = sum(len(scc) for scc in fresh_scc_queue)
manager.add_stats(sccs_left=sccs_left, nodes_left=nodes_left)
Expand Down Expand Up @@ -2569,7 +2588,7 @@ def process_stale_scc(graph: Graph, scc: List[str], manager: BuildManager) -> No
graph[id].transitive_error = True
for id in stale:
graph[id].finish_passes()
if manager.options.cache_fine_grained:
if manager.options.cache_fine_grained or manager.options.fine_grained_incremental:
graph[id].compute_fine_grained_deps()
graph[id].generate_unused_ignore_notes()
manager.flush_errors(manager.errors.file_messages(graph[id].xpath), False)
Expand Down
31 changes: 27 additions & 4 deletions mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from mypy.dmypy_util import STATUS_FILE, receive
from mypy.gclogger import GcLogger
from mypy.fscache import FileSystemCache
from mypy.fswatcher import FileSystemWatcher
from mypy.fswatcher import FileSystemWatcher, FileData


def daemonize(func: Callable[[], None], log_file: Optional[str] = None) -> int:
Expand Down Expand Up @@ -99,13 +99,18 @@ def __init__(self, flags: List[str]) -> None:
sys.exit("dmypy: start/restart should not disable incremental mode")
if options.quick_and_dirty:
sys.exit("dmypy: start/restart should not specify quick_and_dirty mode")
if options.use_fine_grained_cache and not options.fine_grained_incremental:
sys.exit("dmypy: fine-grained cache can only be used in experimental mode")
self.options = options
if os.path.isfile(STATUS_FILE):
os.unlink(STATUS_FILE)
if self.fine_grained:
options.incremental = True
options.show_traceback = True
options.cache_dir = os.devnull
if options.use_fine_grained_cache:
options.cache_fine_grained = True # set this so that cache options match
else:
options.cache_dir = os.devnull

def serve(self) -> None:
"""Serve requests, synchronously (no thread or fork)."""
Expand Down Expand Up @@ -263,11 +268,29 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
manager = result.manager
graph = result.graph
self.fine_grained_manager = mypy.server.update.FineGrainedBuildManager(manager, graph)
status = 1 if messages else 0
self.previous_messages = messages[:]
self.fine_grained_initialized = True
self.previous_sources = sources
self.fscache.flush()

# If we are using the fine-grained cache, build hasn't actually done
# the typechecking on the updated files yet.
# Run a fine-grained update starting from the cached data
if self.options.use_fine_grained_cache:
# Pull times and hashes out of the saved_cache and stick them into
# the fswatcher, so we pick up the changes.
for meta, mypyfile, type_map in manager.saved_cache.values():
if meta.mtime is None: continue
self.fswatcher.set_file_data(
meta.path,
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))

# Run an update
changed = self.find_changed(sources)
if changed:
messages = self.fine_grained_manager.update(changed)

status = 1 if messages else 0
self.previous_messages = messages[:]
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}

def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:
Expand Down
3 changes: 3 additions & 0 deletions mypy/fswatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ def __init__(self, fs: FileSystemCache) -> None:
def paths(self) -> AbstractSet[str]:
return self._paths

def set_file_data(self, path: str, data: FileData) -> None:
self._file_data[path] = data

def add_watched_paths(self, paths: Iterable[str]) -> None:
for path in paths:
if path not in self._paths:
Expand Down
2 changes: 2 additions & 0 deletions mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,8 @@ def add_invertible_flag(flag: str,
if server_options:
parser.add_argument('--experimental', action='store_true', dest='fine_grained_incremental',
help="enable fine-grained incremental mode")
parser.add_argument('--use-fine-grained-cache', action='store_true',
help="use the cache in fine-grained incremental mode")

report_group = parser.add_argument_group(
title='report generation',
Expand Down
3 changes: 3 additions & 0 deletions mypy/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ class MypyFile(SymbolNode):
ignored_lines = None # type: Set[int]
# Is this file represented by a stub file (.pyi)?
is_stub = False
# Is this loaded from the cache and thus missing the actual body of the file?
is_cache_skeleton = False

def __init__(self,
defs: List[Statement],
Expand Down Expand Up @@ -249,6 +251,7 @@ def deserialize(cls, data: JsonDict) -> 'MypyFile':
tree.names = SymbolTable.deserialize(data['names'])
tree.is_stub = data['is_stub']
tree.path = data['path']
tree.is_cache_skeleton = True
return tree


Expand Down
1 change: 1 addition & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def __init__(self) -> None:
self.skip_version_check = False
self.fine_grained_incremental = False
self.cache_fine_grained = False
self.use_fine_grained_cache = False

# Paths of user plugins
self.plugins = [] # type: List[str]
Expand Down
41 changes: 32 additions & 9 deletions mypy/server/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ def __init__(self,
self.blocking_error = None # type: Optional[Tuple[str, str]]
# Module that we haven't processed yet but that are known to be stale.
self.stale = [] # type: List[Tuple[str, str]]
# Disable the cache so that load_graph doesn't try going back to disk
# for the cache. This is kind of a hack and it might be better to have
# this directly reflected in load_graph's interface.
self.options.cache_dir = os.devnull
mark_all_meta_as_memory_only(graph, manager)
manager.saved_cache = preserve_full_cache(graph, manager)
self.type_maps = extract_type_maps(graph)
Expand Down Expand Up @@ -281,9 +285,10 @@ def update_single(self, module: str, path: str) -> Tuple[List[str],
print('triggered:', sorted(filtered))
self.triggered.extend(triggered | self.previous_targets_with_errors)
collect_dependencies({module: tree}, self.deps, graph)
propagate_changes_using_dependencies(manager, graph, self.deps, triggered,
{module},
self.previous_targets_with_errors)
remaining += propagate_changes_using_dependencies(
manager, graph, self.deps, triggered,
{module},
self.previous_targets_with_errors)

# Preserve state needed for the next update.
self.previous_targets_with_errors = manager.errors.targets()
Expand Down Expand Up @@ -318,6 +323,7 @@ def mark_all_meta_as_memory_only(graph: Dict[str, State],
def get_all_dependencies(manager: BuildManager, graph: Dict[str, State],
options: Options) -> Dict[str, Set[str]]:
"""Return the fine-grained dependency map for an entire build."""
# Deps for each module were computed during build() or loaded from the cache.
deps = {} # type: Dict[str, Set[str]]
collect_dependencies(manager.modules, deps, graph)
return deps
Expand Down Expand Up @@ -374,7 +380,7 @@ def update_single_isolated(module: str,
sources = get_sources(previous_modules, [(module, path)])
invalidate_stale_cache_entries(manager.saved_cache, [(module, path)])

manager.missing_modules = set()
manager.missing_modules.clear()
try:
graph = load_graph(sources, manager)
except CompileError as err:
Expand Down Expand Up @@ -441,6 +447,7 @@ def update_single_isolated(module: str,
# Perform type checking.
state.type_check_first_pass()
state.type_check_second_pass()
state.compute_fine_grained_deps()
state.finish_passes()
# TODO: state.write_cache()?
# TODO: state.mark_as_rechecked()?
Expand Down Expand Up @@ -492,7 +499,8 @@ def delete_module(module_id: str,
# TODO: Remove deps for the module (this only affects memory use, not correctness)
assert module_id not in graph
new_graph = graph.copy()
del manager.modules[module_id]
if module_id in manager.modules:
del manager.modules[module_id]
if module_id in manager.saved_cache:
del manager.saved_cache[module_id]
components = module_id.split('.')
Expand Down Expand Up @@ -654,7 +662,6 @@ def collect_dependencies(new_modules: Mapping[str, Optional[MypyFile]],
for id, node in new_modules.items():
if node is None:
continue
graph[id].compute_fine_grained_deps()
for trigger, targets in graph[id].fine_grained_deps.items():
deps.setdefault(trigger, set()).update(targets)

Expand Down Expand Up @@ -711,9 +718,15 @@ def propagate_changes_using_dependencies(
deps: Dict[str, Set[str]],
triggered: Set[str],
up_to_date_modules: Set[str],
targets_with_errors: Set[str]) -> None:
targets_with_errors: Set[str]) -> List[Tuple[str, str]]:
"""Transitively rechecks targets based on triggers and the dependency map.

Returns a list (module id, path) tuples representing modules that contain
a target that needs to be reprocessed but that has not been parsed yet."""

# TODO: Multiple type checking passes
num_iter = 0
remaining_modules = []

# Propagate changes until nothing visible has changed during the last
# iteration.
Expand All @@ -737,7 +750,13 @@ def propagate_changes_using_dependencies(
# TODO: Preserve order (set is not optimal)
for id, nodes in sorted(todo.items(), key=lambda x: x[0]):
assert id not in up_to_date_modules
triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
if manager.modules[id].is_cache_skeleton:
# We have only loaded the cache for this file, not the actual file,
# so we can't access the nodes to reprocess.
# Add it to the queue of files that need to be processed fully.
remaining_modules.append((id, manager.modules[id].path))
else:
triggered |= reprocess_nodes(manager, graph, id, nodes, deps)
# Changes elsewhere may require us to reprocess modules that were
# previously considered up to date. For example, there may be a
# dependency loop that loops back to an originally processed module.
Expand All @@ -746,6 +765,8 @@ def propagate_changes_using_dependencies(
if DEBUG:
print('triggered:', list(triggered))

return remaining_modules


def find_targets_recursive(
triggers: Set[str],
Expand Down Expand Up @@ -993,4 +1014,6 @@ def lookup_target(modules: Dict[str, MypyFile], target: str) -> List[DeferredNod


def extract_type_maps(graph: Graph) -> Dict[str, Dict[Expression, Type]]:
return {id: state.type_map() for id, state in graph.items()}
# This is used to export information used only by the testmerge harness.
return {id: state.type_map() for id, state in graph.items()
if state.tree}
1 change: 1 addition & 0 deletions mypy/test/testdmypy.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def run_case_once(self, testcase: DataDrivenTestCase, incremental_step: int) ->
server_options = [] # type: List[str]
if 'fine-grained' in testcase.file:
server_options.append('--experimental')
options.fine_grained_incremental = True
self.server = dmypy_server.Server(server_options) # TODO: Fix ugly API
self.server.options = options

Expand Down
56 changes: 48 additions & 8 deletions mypy/test/testfinegrained.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from mypy.test.testtypegen import ignore_node
from mypy.types import TypeStrVisitor, Type
from mypy.util import short_type
import pytest # type: ignore # no pytest in typeshed


class FineGrainedSuite(DataSuite):
Expand All @@ -41,17 +42,44 @@ class FineGrainedSuite(DataSuite):
]
base_path = test_temp_dir
optional_out = True
# Whether to use the fine-grained cache in the testing. This is overridden
# by a trivial subclass to produce a suite that uses the cache.
use_cache = False

# Decide whether to skip the test. This could have been structured
# as a filter() classmethod also, but we want the tests reported
# as skipped, not just elided.
def should_skip(self, testcase: DataDrivenTestCase) -> bool:
if self.use_cache:
if testcase.name.endswith("-skip-cache"):
return True
# TODO: In caching mode we currently don't well support
# starting from cached states with errors in them.
if testcase.output and testcase.output[0] != '==':
return True
else:
if testcase.name.endswith("-skip-nocache"):
return True

return False

def run_case(self, testcase: DataDrivenTestCase) -> None:
if self.should_skip(testcase):
pytest.skip()
return

main_src = '\n'.join(testcase.input)
sources_override = self.parse_sources(main_src)
messages, manager, graph = self.build(main_src, testcase, sources_override)

messages, manager, graph = self.build(main_src, testcase, sources_override,
build_cache=self.use_cache,
enable_cache=self.use_cache)
a = []
if messages:
a.extend(normalize_messages(messages))

fine_grained_manager = FineGrainedBuildManager(manager, graph)
fine_grained_manager = None
if not self.use_cache:
fine_grained_manager = FineGrainedBuildManager(manager, graph)

steps = testcase.find_steps()
all_triggered = []
Expand All @@ -70,6 +98,14 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
modules = [(module, path)
for module, path in sources_override
if any(m == module for m, _ in modules)]

# If this is the second iteration and we are using a
# cache, now we need to set it up
if fine_grained_manager is None:
messages, manager, graph = self.build(main_src, testcase, sources_override,
build_cache=False, enable_cache=True)
fine_grained_manager = FineGrainedBuildManager(manager, graph)

new_messages = fine_grained_manager.update(modules)
all_triggered.append(fine_grained_manager.triggered)
new_messages = normalize_messages(new_messages)
Expand All @@ -82,8 +118,8 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:

assert_string_arrays_equal(
testcase.output, a,
'Invalid output ({}, line {})'.format(testcase.file,
testcase.line))
'Invalid output ({}, line {})'.format(
testcase.file, testcase.line))

if testcase.triggered:
assert_string_arrays_equal(
Expand All @@ -95,14 +131,18 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
def build(self,
source: str,
testcase: DataDrivenTestCase,
sources_override: Optional[List[Tuple[str, str]]]) -> Tuple[List[str],
BuildManager,
Graph]:
sources_override: Optional[List[Tuple[str, str]]],
build_cache: bool,
enable_cache: bool) -> Tuple[List[str], BuildManager, Graph]:
# This handles things like '# flags: --foo'.
options = parse_options(source, testcase, incremental_step=1)
options.incremental = True
options.use_builtins_fixtures = True
options.show_traceback = True
options.fine_grained_incremental = not build_cache
options.use_fine_grained_cache = enable_cache and not build_cache
options.cache_fine_grained = enable_cache

main_path = os.path.join(test_temp_dir, 'main')
with open(main_path, 'w') as f:
f.write(source)
Expand Down
12 changes: 12 additions & 0 deletions mypy/test/testfinegrainedcache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
"""Tests for fine-grained incremental checking using the cache.

All of the real code for this lives in testfinegrained.py.
"""

# We can't "import FineGrainedSuite from ..." because that will cause pytest
# to collect the non-caching tests when running this file.
import mypy.test.testfinegrained


class FineGrainedCacheSuite(mypy.test.testfinegrained.FineGrainedSuite):
use_cache = True
1 change: 1 addition & 0 deletions mypy/test/testmerge.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def run_case(self, testcase: DataDrivenTestCase) -> None:
def build(self, source: str) -> Tuple[List[str], Optional[BuildManager], Dict[str, State]]:
options = Options()
options.incremental = True
options.fine_grained_incremental = True
options.use_builtins_fixtures = True
options.show_traceback = True
main_path = os.path.join(test_temp_dir, 'main')
Expand Down
Loading