Skip to content

Overhaul file-system operations to use FileSystemCache #4623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Mar 14, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
285 changes: 103 additions & 182 deletions mypy/build.py

Large diffs are not rendered by default.

29 changes: 11 additions & 18 deletions mypy/dmypy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,12 +283,16 @@ def check_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str,
return self.fine_grained_increment(sources)

def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:
self.fscache = FileSystemCache(self.options.python_version)
self.fswatcher = FileSystemWatcher(self.fscache)
# The file system cache we create gets passed off to
# BuildManager, and thence to FineGrainedBuildManager, which
# assumes responsibility for clearing it after updates.
fscache = FileSystemCache(self.options.python_version)
self.fswatcher = FileSystemWatcher(fscache)
self.update_sources(sources)
try:
result = mypy.build.build(sources=sources,
options=self.options,
fscache=fscache,
alt_lib_path=self.alt_lib_path)
except mypy.errors.CompileError as e:
output = ''.join(s + '\n' for s in e.messages)
Expand All @@ -298,9 +302,7 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
out, err = '', output
return {'out': out, 'err': err, 'status': 2}
messages = result.errors
manager = result.manager
graph = result.graph
self.fine_grained_manager = FineGrainedBuildManager(manager, graph)
self.fine_grained_manager = FineGrainedBuildManager(result)
self.previous_sources = sources

# If we are using the fine-grained cache, build hasn't actually done
Expand All @@ -317,7 +319,6 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
state.path,
FileData(st_mtime=float(meta.mtime), st_size=meta.size, md5=meta.hash))

# Run an update
changed, removed = self.find_changed(sources)

# Find anything that has had its dependency list change
Expand All @@ -326,16 +327,14 @@ def initialize_fine_grained(self, sources: List[mypy.build.BuildSource]) -> Dict
assert state.path is not None
changed.append((state.id, state.path))

if changed or removed:
messages = self.fine_grained_manager.update(changed, removed)
# Run an update
messages = self.fine_grained_manager.update(changed, removed)
else:
# Stores the initial state of sources as a side effect.
self.fswatcher.find_changed()

self.fscache.flush()

fscache.flush()
status = 1 if messages else 0
self.previous_messages = messages[:]
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}

def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[str, Any]:
Expand All @@ -345,19 +344,13 @@ def fine_grained_increment(self, sources: List[mypy.build.BuildSource]) -> Dict[
self.update_sources(sources)
changed, removed = self.find_changed(sources)
t1 = time.time()
if not (changed or removed):
# Nothing changed -- just produce the same result as before.
messages = self.previous_messages
else:
messages = self.fine_grained_manager.update(changed, removed)
messages = self.fine_grained_manager.update(changed, removed)
t2 = time.time()
self.fine_grained_manager.manager.log(
"fine-grained increment: find_changed: {:.3f}s, update: {:.3f}s".format(
t1 - t0, t2 - t1))
status = 1 if messages else 0
self.previous_messages = messages[:]
self.previous_sources = sources
self.fscache.flush()
return {'out': ''.join(s + '\n' for s in messages), 'err': '', 'status': status}

def update_sources(self, sources: List[mypy.build.BuildSource]) -> None:
Expand Down
7 changes: 0 additions & 7 deletions mypy/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,13 +616,6 @@ def __init__(self,
self.module_with_blocker = module_with_blocker


class DecodeError(Exception):
"""Exception raised when a file cannot be decoded due to an unknown encoding type.

Essentially a wrapper for the LookupError raised by `bytearray.decode`
"""


def remove_path_prefix(path: str, prefix: str) -> str:
"""If path starts with prefix, return copy of path with the prefix removed.
Otherwise, return path. If path is None, return None.
Expand Down
99 changes: 68 additions & 31 deletions mypy/fscache.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,45 +30,21 @@

import os
import stat
from typing import Tuple, Dict, List
from typing import Tuple, Dict, List, Optional
from mypy.util import read_with_python_encoding

from mypy.build import read_with_python_encoding
from mypy.errors import DecodeError


class FileSystemCache:
def __init__(self, pyversion: Tuple[int, int]) -> None:
self.pyversion = pyversion
class FileSystemMetaCache:
def __init__(self) -> None:
self.flush()

def flush(self) -> None:
"""Start another transaction and empty all caches."""
self.stat_cache = {} # type: Dict[str, os.stat_result]
self.stat_error_cache = {} # type: Dict[str, Exception]
self.read_cache = {} # type: Dict[str, str]
self.read_error_cache = {} # type: Dict[str, Exception]
self.hash_cache = {} # type: Dict[str, str]
self.listdir_cache = {} # type: Dict[str, List[str]]
self.listdir_error_cache = {} # type: Dict[str, Exception]

def read_with_python_encoding(self, path: str) -> str:
if path in self.read_cache:
return self.read_cache[path]
if path in self.read_error_cache:
raise self.read_error_cache[path]

# Need to stat first so that the contents of file are from no
# earlier instant than the mtime reported by self.stat().
self.stat(path)

try:
data, md5hash = read_with_python_encoding(path, self.pyversion)
except Exception as err:
self.read_error_cache[path] = err
raise
self.read_cache[path] = data
self.hash_cache[path] = md5hash
return data
self.isfile_case_cache = {} # type: Dict[str, bool]

def stat(self, path: str) -> os.stat_result:
if path in self.stat_cache:
Expand Down Expand Up @@ -97,11 +73,40 @@ def listdir(self, path: str) -> List[str]:
return results

def isfile(self, path: str) -> bool:
st = self.stat(path)
try:
st = self.stat(path)
except OSError:
return False
return stat.S_ISREG(st.st_mode)

def isfile_case(self, path: str) -> bool:
"""Return whether path exists and is a file.

On case-insensitive filesystems (like Mac or Windows) this returns
False if the case of the path's last component does not exactly
match the case found in the filesystem.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, this means that importing wrongly-cased packages will not be flagged, since we only compare the filename (which would be __init__.py for a package). Not actionable except perhaps add a TODO comment; this was a pre-existing problem. (Python itself gets it right so that's a fallback.)

TODO: We should maybe check the case for some directory components also,
to avoid permitting wrongly-cased *packages*.
"""
if path in self.isfile_case_cache:
return self.isfile_case_cache[path]
head, tail = os.path.split(path)
if not tail:
res = False
else:
try:
names = self.listdir(head)
res = tail in names and self.isfile(path)
except OSError:
res = False
self.isfile_case_cache[path] = res
return res

def isdir(self, path: str) -> bool:
st = self.stat(path)
try:
st = self.stat(path)
except OSError:
return False
return stat.S_ISDIR(st.st_mode)

def exists(self, path: str) -> bool:
Expand All @@ -111,6 +116,38 @@ def exists(self, path: str) -> bool:
return False
return True


class FileSystemCache(FileSystemMetaCache):
def __init__(self, pyversion: Tuple[int, int]) -> None:
self.pyversion = pyversion
self.flush()

def flush(self) -> None:
"""Start another transaction and empty all caches."""
super().flush()
self.read_cache = {} # type: Dict[str, str]
self.read_error_cache = {} # type: Dict[str, Exception]
self.hash_cache = {} # type: Dict[str, str]

def read_with_python_encoding(self, path: str) -> str:
if path in self.read_cache:
return self.read_cache[path]
if path in self.read_error_cache:
raise self.read_error_cache[path]

# Need to stat first so that the contents of file are from no
# earlier instant than the mtime reported by self.stat().
self.stat(path)

try:
data, md5hash = read_with_python_encoding(path, self.pyversion)
except Exception as err:
self.read_error_cache[path] = err
raise
self.read_cache[path] = data
self.hash_cache[path] = md5hash
return data

def md5(self, path: str) -> str:
if path not in self.hash_cache:
self.read_with_python_encoding(path)
Expand Down
4 changes: 3 additions & 1 deletion mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,8 @@ def add_invertible_flag(flag: str,
.format(special_opts.package))
options.build_type = BuildType.MODULE
lib_path = [os.getcwd()] + build.mypy_path()
targets = build.find_modules_recursive(special_opts.package, lib_path)
# TODO: use the same cache as the BuildManager will
targets = build.FindModuleCache().find_modules_recursive(special_opts.package, lib_path)
if not targets:
fail("Can't find package '{}'".format(special_opts.package))
return targets, options
Expand All @@ -548,6 +549,7 @@ def add_invertible_flag(flag: str,
return targets, options


# TODO: use a FileSystemCache for this
def create_source_list(files: Sequence[str], options: Options) -> List[BuildSource]:
targets = []
for f in files:
Expand Down
42 changes: 24 additions & 18 deletions mypy/server/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,17 @@
Major todo items:

- Fully support multiple type checking passes
- Use mypy.fscache to access file system
"""

import os
import time
import os.path
from typing import (
Dict, List, Set, Tuple, Iterable, Union, Optional, Mapping, NamedTuple, Callable
)

from mypy.build import (
BuildManager, State, BuildSource, Graph, load_graph, find_module_clear_caches,
BuildManager, State, BuildSource, BuildResult, Graph, load_graph,
PRI_INDIRECT, DEBUG_FINE_GRAINED,
)
from mypy.checker import DeferredNode
Expand All @@ -135,6 +135,7 @@
)
from mypy.options import Options
from mypy.types import Type
from mypy.fscache import FileSystemCache
from mypy.semanal import apply_semantic_analyzer_patches
from mypy.server.astdiff import (
snapshot_symbol_table, compare_symbol_table_snapshots, SnapshotItem
Expand All @@ -150,21 +151,23 @@


class FineGrainedBuildManager:
def __init__(self,
manager: BuildManager,
graph: Graph) -> None:
def __init__(self, result: BuildResult) -> None:
"""Initialize fine-grained build based on a batch build.

Args:
result: Result from the initialized build.
The manager and graph will be taken over by this class.
manager: State of the build (mutated by this class)
graph: Additional state of the build (only read to initialize state)
"""
manager = result.manager
self.manager = manager
self.graph = result.graph
self.options = manager.options
self.previous_modules = get_module_to_path_map(manager)
self.deps = get_all_dependencies(manager, graph, self.options)
self.deps = get_all_dependencies(manager, self.graph, self.options)
self.previous_targets_with_errors = manager.errors.targets()
self.graph = graph
self.previous_messages = result.errors[:]
# Module, if any, that had blocking errors in the last run as (id, path) tuple.
# TODO: Handle blocking errors in the initial build
self.blocking_error = None # type: Optional[Tuple[str, str]]
Expand Down Expand Up @@ -205,13 +208,15 @@ def update(self,
A list of errors.
"""
changed_modules = changed_modules + removed_modules
assert changed_modules or removed_modules, 'No changed modules'

removed_set = {module for module, _ in removed_modules}
self.changed_modules = changed_modules

# Reset global caches for the new build.
find_module_clear_caches()
if not changed_modules:
self.manager.fscache.flush()
return self.previous_messages

# Reset find_module's caches for the new build.
self.manager.find_module_cache.clear()

self.triggered = []
self.updated_modules = []
Expand Down Expand Up @@ -249,8 +254,10 @@ def update(self,
if blocker:
self.blocking_error = (next_id, next_path)
self.stale = changed_modules
return messages
break

self.manager.fscache.flush()
self.previous_messages = messages[:]
return messages

def update_single(self,
Expand Down Expand Up @@ -383,7 +390,7 @@ def update_single_isolated(module: str,
manager.log_fine_grained('new module %r' % module)

old_modules = dict(manager.modules)
sources = get_sources(previous_modules, [(module, path)])
sources = get_sources(manager.fscache, previous_modules, [(module, path)])

if module in manager.missing_modules:
manager.missing_modules.remove(module)
Expand All @@ -407,7 +414,7 @@ def update_single_isolated(module: str,
remaining_modules = []
return BlockedUpdate(err.module_with_blocker, path, remaining_modules, err.messages)

if not os.path.isfile(path) or force_removed:
if not manager.fscache.isfile(path) or force_removed:
delete_module(module, graph, manager)
return NormalUpdate(module, path, [], None)

Expand Down Expand Up @@ -537,13 +544,12 @@ def get_module_to_path_map(manager: BuildManager) -> Dict[str, str]:
for module, node in manager.modules.items()}


def get_sources(modules: Dict[str, str],
def get_sources(fscache: FileSystemCache,
modules: Dict[str, str],
changed_modules: List[Tuple[str, str]]) -> List[BuildSource]:
# TODO: Race condition when reading from the file system; we should only read each
# bit of external state once during a build to have a consistent view of the world
sources = []
for id, path in changed_modules:
if os.path.isfile(path):
if fscache.isfile(path):
sources.append(BuildSource(path, id, None))
return sources

Expand Down
4 changes: 2 additions & 2 deletions mypy/stubgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def find_module_path_and_all(module: str, pyversion: Tuple[int, int],
module_all = getattr(mod, '__all__', None)
else:
# Find module by going through search path.
module_path = mypy.build.find_module(module, ['.'] + search_path)
module_path = mypy.build.FindModuleCache().find_module(module, ['.'] + search_path)
if not module_path:
raise SystemExit(
"Can't find module '{}' (consider using --search-path)".format(module))
Expand Down Expand Up @@ -201,7 +201,7 @@ def generate_stub(path: str,
include_private: bool = False
) -> None:

source, _ = mypy.build.read_with_python_encoding(path, pyversion)
source, _ = mypy.util.read_with_python_encoding(path, pyversion)
options = MypyOptions()
options.python_version = pyversion
try:
Expand Down
Loading