Skip to content

Commit

Permalink
introduce per-file timing-stats (#12639)
Browse files Browse the repository at this point in the history
When profiling mypy over a large codebase, it can be useful
to know which files are slowest to typecheck.

Gather per-file timing stats and expose them through a new
(hidden) command line switch
  • Loading branch information
hugues-aff authored Apr 29, 2022
1 parent d64efcd commit d48d548
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 8 deletions.
32 changes: 31 additions & 1 deletion mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
from mypy.errors import Errors, CompileError, ErrorInfo, report_internal_error
from mypy.util import (
DecodeError, decode_python_encoding, is_sub_path, get_mypy_comments, module_prefix,
read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes
read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes,
time_ref, time_spent_us
)
if TYPE_CHECKING:
from mypy.report import Reports # Avoid unconditional slow import
Expand Down Expand Up @@ -256,6 +257,8 @@ def _build(sources: List[BuildSource],
graph = dispatch(sources, manager, stdout)
if not options.fine_grained_incremental:
TypeState.reset_all_subtype_caches()
if options.timing_stats is not None:
dump_timing_stats(options.timing_stats, graph)
return BuildResult(manager, graph)
finally:
t0 = time.time()
Expand Down Expand Up @@ -1808,6 +1811,9 @@ class State:

fine_grained_deps_loaded = False

# Cumulative time spent on this file, in microseconds (for profiling stats)
time_spent_us: int = 0

def __init__(self,
id: Optional[str],
path: Optional[str],
Expand Down Expand Up @@ -2034,6 +2040,8 @@ def parse_file(self) -> None:
else:
manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id))

t0 = time_ref()

with self.wrap_context():
source = self.source
self.source = None # We won't need it again.
Expand Down Expand Up @@ -2079,6 +2087,8 @@ def parse_file(self) -> None:
self.tree.ignored_lines,
self.ignore_all or self.options.ignore_errors)

self.time_spent_us += time_spent_us(t0)

if not cached:
# Make a copy of any errors produced during parse time so that
# fine-grained mode can repeat them when the module is
Expand Down Expand Up @@ -2113,6 +2123,9 @@ def semantic_analysis_pass1(self) -> None:
"""
options = self.options
assert self.tree is not None

t0 = time_ref()

# Do the first pass of semantic analysis: analyze the reachability
# of blocks and import statements. We must do this before
# processing imports, since this may mark some import statements as
Expand All @@ -2131,6 +2144,7 @@ def semantic_analysis_pass1(self) -> None:
if options.allow_redefinition:
# Perform more renaming across the AST to allow variable redefinitions
self.tree.accept(VariableRenameVisitor())
self.time_spent_us += time_spent_us(t0)

def add_dependency(self, dep: str) -> None:
if dep not in self.dependencies_set:
Expand Down Expand Up @@ -2188,8 +2202,10 @@ def compute_dependencies(self) -> None:
def type_check_first_pass(self) -> None:
if self.options.semantic_analysis_only:
return
t0 = time_ref()
with self.wrap_context():
self.type_checker().check_first_pass()
self.time_spent_us += time_spent_us(t0)

def type_checker(self) -> TypeChecker:
if not self._type_checker:
Expand All @@ -2207,14 +2223,17 @@ def type_map(self) -> Dict[Expression, Type]:
def type_check_second_pass(self) -> bool:
if self.options.semantic_analysis_only:
return False
t0 = time_ref()
with self.wrap_context():
return self.type_checker().check_second_pass()
self.time_spent_us += time_spent_us(t0)

def finish_passes(self) -> None:
assert self.tree is not None, "Internal error: method must be called on parsed file only"
manager = self.manager
if self.options.semantic_analysis_only:
return
t0 = time_ref()
with self.wrap_context():
# Some tests (and tools) want to look at the set of all types.
options = manager.options
Expand All @@ -2237,6 +2256,7 @@ def finish_passes(self) -> None:
self.free_state()
if not manager.options.fine_grained_incremental and not manager.options.preserve_asts:
free_tree(self.tree)
self.time_spent_us += time_spent_us(t0)

def free_state(self) -> None:
if self._type_checker:
Expand Down Expand Up @@ -2771,6 +2791,16 @@ def dumps(self) -> str:
json.dumps(self.deps))


def dump_timing_stats(path: str, graph: Graph) -> None:
"""
Dump timing stats for each file in the given graph
"""
with open(path, 'w') as f:
for k in sorted(graph.keys()):
v = graph[k]
f.write('{} {}\n'.format(v.id, v.time_spent_us))


def dump_graph(graph: Graph, stdout: Optional[TextIO] = None) -> None:
"""Dump the graph as a JSON string to stdout.
Expand Down
3 changes: 3 additions & 0 deletions mypy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,9 @@ def add_invertible_flag(flag: str,
parser.add_argument(
'--dump-build-stats', action='store_true',
help=argparse.SUPPRESS)
# dump timing stats for each processed file into the given output file
parser.add_argument(
'--timing-stats', dest='timing_stats', help=argparse.SUPPRESS)
# --debug-cache will disable any cache-related compressions/optimizations,
# which will make the cache writing process output pretty-printed JSON (which
# is easier to debug).
Expand Down
1 change: 1 addition & 0 deletions mypy/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ def __init__(self) -> None:
self.dump_inference_stats = False
self.dump_build_stats = False
self.enable_incomplete_features = False
self.timing_stats: Optional[str] = None

# -- test options --
# Stop after the semantic analysis phase
Expand Down
10 changes: 6 additions & 4 deletions mypy/test/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import sys

import pytest
from typing import List, Tuple, Set, Optional, Iterator, Any, Dict, NamedTuple, Union
from typing import List, Tuple, Set, Optional, Iterator, Any, Dict, NamedTuple, Union, Pattern

from mypy.test.config import test_data_prefix, test_temp_dir, PREFIX

Expand Down Expand Up @@ -44,7 +44,7 @@ def parse_test_case(case: 'DataDrivenTestCase') -> None:
normalize_output = True

files: List[Tuple[str, str]] = [] # path and contents
output_files: List[Tuple[str, str]] = [] # path and contents for output files
output_files: List[Tuple[str, Union[str, Pattern[str]]]] = [] # output path and contents
output: List[str] = [] # Regular output errors
output2: Dict[int, List[str]] = {} # Output errors for incremental, runs 2+
deleted_paths: Dict[int, Set[str]] = {} # from run number of paths
Expand All @@ -57,13 +57,15 @@ def parse_test_case(case: 'DataDrivenTestCase') -> None:
# optionally followed by lines of text.
item = first_item = test_items[0]
for item in test_items[1:]:
if item.id == 'file' or item.id == 'outfile':
if item.id in {'file', 'outfile', 'outfile-re'}:
# Record an extra file needed for the test case.
assert item.arg is not None
contents = expand_variables('\n'.join(item.data))
file_entry = (join(base_path, item.arg), contents)
if item.id == 'file':
files.append(file_entry)
elif item.id == 'outfile-re':
output_files.append((file_entry[0], re.compile(file_entry[1].rstrip(), re.S)))
else:
output_files.append(file_entry)
elif item.id in ('builtins', 'builtins_py2'):
Expand Down Expand Up @@ -220,7 +222,7 @@ class DataDrivenTestCase(pytest.Item):

# Extra attributes used by some tests.
last_line: int
output_files: List[Tuple[str, str]] # Path and contents for output files
output_files: List[Tuple[str, Union[str, Pattern[str]]]] # Path and contents for output files
deleted_paths: Dict[int, Set[str]] # Mapping run number -> paths
triggered: List[str] # Active triggers (one line per incremental step)

Expand Down
15 changes: 12 additions & 3 deletions mypy/test/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import shutil
import contextlib

from typing import List, Iterable, Dict, Tuple, Callable, Any, Iterator, Union
from typing import List, Iterable, Dict, Tuple, Callable, Any, Iterator, Union, Pattern

from mypy import defaults
import mypy.api as api
Expand Down Expand Up @@ -458,8 +458,17 @@ def check_test_output_files(testcase: DataDrivenTestCase,
'Expected file {} was not produced by test case{}'.format(
path, ' on step %d' % step if testcase.output2 else ''))
with open(path, 'r', encoding='utf8') as output_file:
actual_output_content = output_file.read().splitlines()
normalized_output = normalize_file_output(actual_output_content,
actual_output_content = output_file.read()

if isinstance(expected_content, Pattern):
if expected_content.fullmatch(actual_output_content) is not None:
continue
raise AssertionError(
'Output file {} did not match its expected output pattern\n---\n{}\n---'.format(
path, actual_output_content)
)

normalized_output = normalize_file_output(actual_output_content.splitlines(),
os.path.abspath(test_temp_dir))
# We always normalize things like timestamp, but only handle operating-system
# specific things if requested.
Expand Down
10 changes: 10 additions & 0 deletions mypy/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import hashlib
import io
import shutil
import time

from typing import (
TypeVar, List, Tuple, Optional, Dict, Sequence, Iterable, Container, IO, Callable
Expand Down Expand Up @@ -763,3 +764,12 @@ def is_stub_package_file(file: str) -> bool:

def unnamed_function(name: Optional[str]) -> bool:
return name is not None and name == "_"


# TODO: replace with uses of perf_counter_ns when support for py3.6 is dropped
# (or when mypy properly handles alternate definitions based on python version check
time_ref = time.perf_counter


def time_spent_us(t0: float) -> int:
return int((time.perf_counter() - t0) * 1e6)
12 changes: 12 additions & 0 deletions test-data/unit/cmdline.test
Original file line number Diff line number Diff line change
Expand Up @@ -1373,3 +1373,15 @@ exclude = (?x)(
1()
[out]
c/cpkg.py:1: error: "int" not callable


[case testCmdlineTimingStats]
# cmd: mypy --timing-stats timing.txt .
[file b/__init__.py]
[file b/c.py]
class C: pass
[outfile-re timing.txt]
.*
b \d+
b\.c \d+
.*

0 comments on commit d48d548

Please sign in to comment.