From e191a323087dcad796d5264f0d9d66a87a9e3b1c Mon Sep 17 00:00:00 2001 From: Hugues Bruant Date: Thu, 21 Apr 2022 01:43:43 -0700 Subject: [PATCH 1/5] introduce per-file timing-stats When profiling mypy over a large codebase, it can be useful to know which files are slowest to typecheck. Gather per-file timing stats and expose them through a new (hidden) command line switch --- mypy/build.py | 31 +++++++++++++++++++++++++++++++ mypy/main.py | 3 +++ mypy/options.py | 1 + 3 files changed, 35 insertions(+) diff --git a/mypy/build.py b/mypy/build.py index f7a9e9e05e1d..812e6528625a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -256,6 +256,8 @@ def _build(sources: List[BuildSource], graph = dispatch(sources, manager, stdout) if not options.fine_grained_incremental: TypeState.reset_all_subtype_caches() + if options.timing_stats is not None: + dump_timing_stats(options.timing_stats, graph) return BuildResult(manager, graph) finally: t0 = time.time() @@ -1808,6 +1810,9 @@ class State: fine_grained_deps_loaded = False + # Cumulative time spent on this file (for profiling stats) + time_spent: int = 0 + def __init__(self, id: Optional[str], path: Optional[str], @@ -2034,6 +2039,8 @@ def parse_file(self) -> None: else: manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) + t0 = time.perf_counter_ns() + with self.wrap_context(): source = self.source self.source = None # We won't need it again. @@ -2079,6 +2086,8 @@ def parse_file(self) -> None: self.tree.ignored_lines, self.ignore_all or self.options.ignore_errors) + self.time_spent += time.perf_counter_ns() - t0 + if not cached: # Make a copy of any errors produced during parse time so that # fine-grained mode can repeat them when the module is @@ -2113,6 +2122,9 @@ def semantic_analysis_pass1(self) -> None: """ options = self.options assert self.tree is not None + + t0 = time.perf_counter_ns() + # Do the first pass of semantic analysis: analyze the reachability # of blocks and import statements. We must do this before # processing imports, since this may mark some import statements as @@ -2131,6 +2143,7 @@ def semantic_analysis_pass1(self) -> None: if options.allow_redefinition: # Perform more renaming across the AST to allow variable redefinitions self.tree.accept(VariableRenameVisitor()) + self.time_spent += time.perf_counter_ns() - t0 def add_dependency(self, dep: str) -> None: if dep not in self.dependencies_set: @@ -2188,8 +2201,10 @@ def compute_dependencies(self) -> None: def type_check_first_pass(self) -> None: if self.options.semantic_analysis_only: return + t0 = time.perf_counter_ns() with self.wrap_context(): self.type_checker().check_first_pass() + self.time_spent += time.perf_counter_ns() - t0 def type_checker(self) -> TypeChecker: if not self._type_checker: @@ -2207,14 +2222,17 @@ def type_map(self) -> Dict[Expression, Type]: def type_check_second_pass(self) -> bool: if self.options.semantic_analysis_only: return False + t0 = time.perf_counter_ns() with self.wrap_context(): return self.type_checker().check_second_pass() + self.time_spent += time.perf_counter_ns() - t0 def finish_passes(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" manager = self.manager if self.options.semantic_analysis_only: return + t0 = time.perf_counter_ns() with self.wrap_context(): # Some tests (and tools) want to look at the set of all types. options = manager.options @@ -2237,6 +2255,7 @@ def finish_passes(self) -> None: self.free_state() if not manager.options.fine_grained_incremental and not manager.options.preserve_asts: free_tree(self.tree) + self.time_spent += time.perf_counter_ns() - t0 def free_state(self) -> None: if self._type_checker: @@ -2771,6 +2790,16 @@ def dumps(self) -> str: json.dumps(self.deps)) +def dump_timing_stats(path: str, graph: Graph) -> None: + """ + Dump timing stats for each file in the given graph + """ + with open(path, 'w') as f: + for k in sorted(graph.keys()): + v = graph[k] + f.write('{} {}\n'.format(v.id, v.time_spent)) + + def dump_graph(graph: Graph, stdout: Optional[TextIO] = None) -> None: """Dump the graph as a JSON string to stdout. @@ -3091,6 +3120,8 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.log("No fresh SCCs left in queue") + + def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: """Come up with the ideal processing order within an SCC. diff --git a/mypy/main.py b/mypy/main.py index c4548ea9b625..7c7a5993d5c5 100644 --- a/mypy/main.py +++ b/mypy/main.py @@ -835,6 +835,9 @@ def add_invertible_flag(flag: str, parser.add_argument( '--dump-build-stats', action='store_true', help=argparse.SUPPRESS) + # dump timing stats for each processed file into the given output file + parser.add_argument( + '--timing-stats', dest='timing_stats', help=argparse.SUPPRESS) # --debug-cache will disable any cache-related compressions/optimizations, # which will make the cache writing process output pretty-printed JSON (which # is easier to debug). diff --git a/mypy/options.py b/mypy/options.py index 8e56d67bbeb8..4cb2434958f4 100644 --- a/mypy/options.py +++ b/mypy/options.py @@ -263,6 +263,7 @@ def __init__(self) -> None: self.dump_inference_stats = False self.dump_build_stats = False self.enable_incomplete_features = False + self.timing_stats: Optional[str] = None # -- test options -- # Stop after the semantic analysis phase From 6d0975964db7248d5b08efc5d239a16ba5e43be6 Mon Sep 17 00:00:00 2001 From: Hugues Bruant Date: Fri, 22 Apr 2022 02:35:49 -0700 Subject: [PATCH 2/5] address comments and use py3.6 compatible timer --- mypy/build.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 812e6528625a..4240c057309f 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -1810,8 +1810,8 @@ class State: fine_grained_deps_loaded = False - # Cumulative time spent on this file (for profiling stats) - time_spent: int = 0 + # Cumulative time spent on this file, in microseconds (for profiling stats) + time_spent_us: int = 0 def __init__(self, id: Optional[str], @@ -2039,7 +2039,7 @@ def parse_file(self) -> None: else: manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) - t0 = time.perf_counter_ns() + t0 = time.perf_counter() with self.wrap_context(): source = self.source @@ -2086,7 +2086,7 @@ def parse_file(self) -> None: self.tree.ignored_lines, self.ignore_all or self.options.ignore_errors) - self.time_spent += time.perf_counter_ns() - t0 + self.time_spent_us += int((time.perf_counter() - t0) * 1e6) if not cached: # Make a copy of any errors produced during parse time so that @@ -2123,7 +2123,7 @@ def semantic_analysis_pass1(self) -> None: options = self.options assert self.tree is not None - t0 = time.perf_counter_ns() + t0 = time.perf_counter() # Do the first pass of semantic analysis: analyze the reachability # of blocks and import statements. We must do this before @@ -2143,7 +2143,7 @@ def semantic_analysis_pass1(self) -> None: if options.allow_redefinition: # Perform more renaming across the AST to allow variable redefinitions self.tree.accept(VariableRenameVisitor()) - self.time_spent += time.perf_counter_ns() - t0 + self.time_spent_us += int((time.perf_counter() - t0) * 1e6) def add_dependency(self, dep: str) -> None: if dep not in self.dependencies_set: @@ -2201,10 +2201,10 @@ def compute_dependencies(self) -> None: def type_check_first_pass(self) -> None: if self.options.semantic_analysis_only: return - t0 = time.perf_counter_ns() + t0 = time.perf_counter() with self.wrap_context(): self.type_checker().check_first_pass() - self.time_spent += time.perf_counter_ns() - t0 + self.time_spent_us += int((time.perf_counter() - t0) * 1e6) def type_checker(self) -> TypeChecker: if not self._type_checker: @@ -2222,17 +2222,17 @@ def type_map(self) -> Dict[Expression, Type]: def type_check_second_pass(self) -> bool: if self.options.semantic_analysis_only: return False - t0 = time.perf_counter_ns() + t0 = time.perf_counter() with self.wrap_context(): return self.type_checker().check_second_pass() - self.time_spent += time.perf_counter_ns() - t0 + self.time_spent_us += int((time.perf_counter() - t0) * 1e6) def finish_passes(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" manager = self.manager if self.options.semantic_analysis_only: return - t0 = time.perf_counter_ns() + t0 = time.perf_counter() with self.wrap_context(): # Some tests (and tools) want to look at the set of all types. options = manager.options @@ -2255,7 +2255,7 @@ def finish_passes(self) -> None: self.free_state() if not manager.options.fine_grained_incremental and not manager.options.preserve_asts: free_tree(self.tree) - self.time_spent += time.perf_counter_ns() - t0 + self.time_spent_us += int((time.perf_counter() - t0) * 1e6) def free_state(self) -> None: if self._type_checker: @@ -2797,7 +2797,7 @@ def dump_timing_stats(path: str, graph: Graph) -> None: with open(path, 'w') as f: for k in sorted(graph.keys()): v = graph[k] - f.write('{} {}\n'.format(v.id, v.time_spent)) + f.write('{} {}\n'.format(v.id, v.time_spent_us)) def dump_graph(graph: Graph, stdout: Optional[TextIO] = None) -> None: @@ -3120,8 +3120,6 @@ def process_graph(graph: Graph, manager: BuildManager) -> None: manager.log("No fresh SCCs left in queue") - - def order_ascc(graph: Graph, ascc: AbstractSet[str], pri_max: int = PRI_ALL) -> List[str]: """Come up with the ideal processing order within an SCC. From 30f19d95e693711cf1e8d5947ca8defeafaa4e0a Mon Sep 17 00:00:00 2001 From: Hugues Bruant Date: Fri, 22 Apr 2022 02:36:17 -0700 Subject: [PATCH 3/5] add test --- mypy/test/data.py | 10 ++++++---- mypy/test/helpers.py | 15 ++++++++++++--- test-data/unit/cmdline.test | 12 ++++++++++++ 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/mypy/test/data.py b/mypy/test/data.py index e886b11ffa8e..e18ac142d15e 100644 --- a/mypy/test/data.py +++ b/mypy/test/data.py @@ -10,7 +10,7 @@ import sys import pytest -from typing import List, Tuple, Set, Optional, Iterator, Any, Dict, NamedTuple, Union +from typing import List, Tuple, Set, Optional, Iterator, Any, Dict, NamedTuple, Union, Pattern from mypy.test.config import test_data_prefix, test_temp_dir, PREFIX @@ -44,7 +44,7 @@ def parse_test_case(case: 'DataDrivenTestCase') -> None: normalize_output = True files: List[Tuple[str, str]] = [] # path and contents - output_files: List[Tuple[str, str]] = [] # path and contents for output files + output_files: List[Tuple[str, Union[str, Pattern[str]]]] = [] # output path and contents output: List[str] = [] # Regular output errors output2: Dict[int, List[str]] = {} # Output errors for incremental, runs 2+ deleted_paths: Dict[int, Set[str]] = {} # from run number of paths @@ -57,13 +57,15 @@ def parse_test_case(case: 'DataDrivenTestCase') -> None: # optionally followed by lines of text. item = first_item = test_items[0] for item in test_items[1:]: - if item.id == 'file' or item.id == 'outfile': + if item.id in {'file', 'outfile', 'outfile-re'}: # Record an extra file needed for the test case. assert item.arg is not None contents = expand_variables('\n'.join(item.data)) file_entry = (join(base_path, item.arg), contents) if item.id == 'file': files.append(file_entry) + elif item.id == 'outfile-re': + output_files.append((file_entry[0], re.compile(file_entry[1].rstrip(), re.S))) else: output_files.append(file_entry) elif item.id in ('builtins', 'builtins_py2'): @@ -220,7 +222,7 @@ class DataDrivenTestCase(pytest.Item): # Extra attributes used by some tests. last_line: int - output_files: List[Tuple[str, str]] # Path and contents for output files + output_files: List[Tuple[str, Union[str, Pattern[str]]]] # Path and contents for output files deleted_paths: Dict[int, Set[str]] # Mapping run number -> paths triggered: List[str] # Active triggers (one line per incremental step) diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index f9f117634c21..19482163ea81 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -5,7 +5,7 @@ import shutil import contextlib -from typing import List, Iterable, Dict, Tuple, Callable, Any, Iterator, Union +from typing import List, Iterable, Dict, Tuple, Callable, Any, Iterator, Union, Pattern from mypy import defaults import mypy.api as api @@ -458,8 +458,17 @@ def check_test_output_files(testcase: DataDrivenTestCase, 'Expected file {} was not produced by test case{}'.format( path, ' on step %d' % step if testcase.output2 else '')) with open(path, 'r', encoding='utf8') as output_file: - actual_output_content = output_file.read().splitlines() - normalized_output = normalize_file_output(actual_output_content, + actual_output_content = output_file.read() + + if isinstance(expected_content, Pattern): + if expected_content.fullmatch(actual_output_content) is not None: + continue + raise AssertionError( + 'Output file {} did not match its expected output pattern\n---\n{}\n---\n{}\n---'.format( + path, actual_output_content, expected_content) + ) + + normalized_output = normalize_file_output(actual_output_content.splitlines(), os.path.abspath(test_temp_dir)) # We always normalize things like timestamp, but only handle operating-system # specific things if requested. diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test index 1a038b9fac09..7fc517643342 100644 --- a/test-data/unit/cmdline.test +++ b/test-data/unit/cmdline.test @@ -1373,3 +1373,15 @@ exclude = (?x)( 1() [out] c/cpkg.py:1: error: "int" not callable + + +[case testCmdlineTimingStats] +# cmd: mypy --timing-stats timing.txt . +[file b/__init__.py] +[file b/c.py] +class C: pass +[outfile-re timing.txt] +.* +b \d+ +b\.c \d+ +.* From e839c6784568a9c5647ef652fead3e82d95bb483 Mon Sep 17 00:00:00 2001 From: Hugues Bruant Date: Fri, 22 Apr 2022 02:39:26 -0700 Subject: [PATCH 4/5] fix flake8 --- mypy/test/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy/test/helpers.py b/mypy/test/helpers.py index 19482163ea81..5046c46eaa43 100644 --- a/mypy/test/helpers.py +++ b/mypy/test/helpers.py @@ -464,8 +464,8 @@ def check_test_output_files(testcase: DataDrivenTestCase, if expected_content.fullmatch(actual_output_content) is not None: continue raise AssertionError( - 'Output file {} did not match its expected output pattern\n---\n{}\n---\n{}\n---'.format( - path, actual_output_content, expected_content) + 'Output file {} did not match its expected output pattern\n---\n{}\n---'.format( + path, actual_output_content) ) normalized_output = normalize_file_output(actual_output_content.splitlines(), From e2ce6c78ec3ef8b55050cd68d2ad826d98b87386 Mon Sep 17 00:00:00 2001 From: Hugues Bruant Date: Wed, 27 Apr 2022 12:33:42 -0700 Subject: [PATCH 5/5] Factor out time_spent computation --- mypy/build.py | 23 ++++++++++++----------- mypy/util.py | 10 ++++++++++ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/mypy/build.py b/mypy/build.py index 4240c057309f..f084e632417a 100644 --- a/mypy/build.py +++ b/mypy/build.py @@ -36,7 +36,8 @@ from mypy.errors import Errors, CompileError, ErrorInfo, report_internal_error from mypy.util import ( DecodeError, decode_python_encoding, is_sub_path, get_mypy_comments, module_prefix, - read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes + read_py_file, hash_digest, is_typeshed_file, is_stub_package_file, get_top_two_prefixes, + time_ref, time_spent_us ) if TYPE_CHECKING: from mypy.report import Reports # Avoid unconditional slow import @@ -2039,7 +2040,7 @@ def parse_file(self) -> None: else: manager.log("Using cached AST for %s (%s)" % (self.xpath, self.id)) - t0 = time.perf_counter() + t0 = time_ref() with self.wrap_context(): source = self.source @@ -2086,7 +2087,7 @@ def parse_file(self) -> None: self.tree.ignored_lines, self.ignore_all or self.options.ignore_errors) - self.time_spent_us += int((time.perf_counter() - t0) * 1e6) + self.time_spent_us += time_spent_us(t0) if not cached: # Make a copy of any errors produced during parse time so that @@ -2123,7 +2124,7 @@ def semantic_analysis_pass1(self) -> None: options = self.options assert self.tree is not None - t0 = time.perf_counter() + t0 = time_ref() # Do the first pass of semantic analysis: analyze the reachability # of blocks and import statements. We must do this before @@ -2143,7 +2144,7 @@ def semantic_analysis_pass1(self) -> None: if options.allow_redefinition: # Perform more renaming across the AST to allow variable redefinitions self.tree.accept(VariableRenameVisitor()) - self.time_spent_us += int((time.perf_counter() - t0) * 1e6) + self.time_spent_us += time_spent_us(t0) def add_dependency(self, dep: str) -> None: if dep not in self.dependencies_set: @@ -2201,10 +2202,10 @@ def compute_dependencies(self) -> None: def type_check_first_pass(self) -> None: if self.options.semantic_analysis_only: return - t0 = time.perf_counter() + t0 = time_ref() with self.wrap_context(): self.type_checker().check_first_pass() - self.time_spent_us += int((time.perf_counter() - t0) * 1e6) + self.time_spent_us += time_spent_us(t0) def type_checker(self) -> TypeChecker: if not self._type_checker: @@ -2222,17 +2223,17 @@ def type_map(self) -> Dict[Expression, Type]: def type_check_second_pass(self) -> bool: if self.options.semantic_analysis_only: return False - t0 = time.perf_counter() + t0 = time_ref() with self.wrap_context(): return self.type_checker().check_second_pass() - self.time_spent_us += int((time.perf_counter() - t0) * 1e6) + self.time_spent_us += time_spent_us(t0) def finish_passes(self) -> None: assert self.tree is not None, "Internal error: method must be called on parsed file only" manager = self.manager if self.options.semantic_analysis_only: return - t0 = time.perf_counter() + t0 = time_ref() with self.wrap_context(): # Some tests (and tools) want to look at the set of all types. options = manager.options @@ -2255,7 +2256,7 @@ def finish_passes(self) -> None: self.free_state() if not manager.options.fine_grained_incremental and not manager.options.preserve_asts: free_tree(self.tree) - self.time_spent_us += int((time.perf_counter() - t0) * 1e6) + self.time_spent_us += time_spent_us(t0) def free_state(self) -> None: if self._type_checker: diff --git a/mypy/util.py b/mypy/util.py index c02e5dcfc9b4..1a3628458c48 100644 --- a/mypy/util.py +++ b/mypy/util.py @@ -8,6 +8,7 @@ import hashlib import io import shutil +import time from typing import ( TypeVar, List, Tuple, Optional, Dict, Sequence, Iterable, Container, IO, Callable @@ -763,3 +764,12 @@ def is_stub_package_file(file: str) -> bool: def unnamed_function(name: Optional[str]) -> bool: return name is not None and name == "_" + + +# TODO: replace with uses of perf_counter_ns when support for py3.6 is dropped +# (or when mypy properly handles alternate definitions based on python version check +time_ref = time.perf_counter + + +def time_spent_us(t0: float) -> int: + return int((time.perf_counter() - t0) * 1e6)