From 753ca80a4fa96b39161ba956f441c264f65297eb Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 15 Sep 2023 11:08:07 +0100 Subject: [PATCH 1/7] Speedup `regr_test.py` by running test cases concurrently --- tests/regr_test.py | 180 ++++++++++++++++++++++++++++++++------------- 1 file changed, 130 insertions(+), 50 deletions(-) diff --git a/tests/regr_test.py b/tests/regr_test.py index 6746b2bc7ed8..ba89e769e68a 100755 --- a/tests/regr_test.py +++ b/tests/regr_test.py @@ -4,12 +4,18 @@ from __future__ import annotations import argparse +import concurrent.futures +import multiprocessing import os +import queue import re import shutil import subprocess import sys import tempfile +import threading +from contextlib import ExitStack, suppress +from dataclasses import dataclass from enum import IntEnum from itertools import product from pathlib import Path @@ -24,11 +30,11 @@ get_mypy_req, make_venv, print_error, - print_success_msg, testcase_dir_from_package_name, ) ReturnCode: TypeAlias = int +PrintQueue: TypeAlias = queue.Queue[str] TEST_CASES = "test_cases" VENV_DIR = ".venv" @@ -104,17 +110,17 @@ class Verbosity(IntEnum): ) -def verbose_log(msg: str) -> None: - print(colored("\n" + msg, "blue")) +def verbose_log(msg: str, q: PrintQueue) -> None: + q.put(colored(msg, "blue")) -def setup_testcase_dir(package: PackageInfo, tempdir: Path, new_test_case_dir: Path, verbosity: Verbosity) -> None: +def setup_testcase_dir(package: PackageInfo, tempdir: Path, verbosity: Verbosity, q: PrintQueue) -> None: if verbosity is verbosity.VERBOSE: - verbose_log(f"Setting up testcase dir in {tempdir}") + verbose_log(f"{package.name}: Setting up testcase dir in {tempdir}", q) # --warn-unused-ignores doesn't work for files inside typeshed. # SO, to work around this, we copy the test_cases directory into a TemporaryDirectory, # and run the test cases inside of that. - shutil.copytree(package.test_case_directory, new_test_case_dir) + shutil.copytree(package.test_case_directory, tempdir / TEST_CASES) if package.is_stdlib: return @@ -137,28 +143,23 @@ def setup_testcase_dir(package: PackageInfo, tempdir: Path, new_test_case_dir: P shutil.copytree(Path("stubs", requirement), new_typeshed / "stubs" / requirement) if requirements.external_pkgs: - if verbosity is Verbosity.VERBOSE: - verbose_log(f"Setting up venv in {tempdir / VENV_DIR}") pip_exe = make_venv(tempdir / VENV_DIR).pip_exe - pip_command = [pip_exe, "install", get_mypy_req(), *requirements.external_pkgs] + # Use --no-cache-dir to avoid issues with concurrent read/writes to the cache + pip_command = [pip_exe, "install", get_mypy_req(), *requirements.external_pkgs, "--no-cache-dir"] if verbosity is Verbosity.VERBOSE: - verbose_log(f"{pip_command=}") + verbose_log(f"{package.name}: Setting up venv in {tempdir / VENV_DIR}. {pip_command=}\n", q) try: subprocess.run(pip_command, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: - print(e.stderr) + q.put(f"{package.name}\n{e.stderr}") raise def run_testcases( - package: PackageInfo, version: str, platform: str, *, tempdir: Path, verbosity: Verbosity + package: PackageInfo, version: str, platform: str, *, tempdir: Path, verbosity: Verbosity, q: PrintQueue ) -> subprocess.CompletedProcess[str]: env_vars = dict(os.environ) new_test_case_dir = tempdir / TEST_CASES - testcasedir_already_setup = new_test_case_dir.exists() and new_test_case_dir.is_dir() - - if not testcasedir_already_setup: - setup_testcase_dir(package, tempdir=tempdir, new_test_case_dir=new_test_case_dir, verbosity=verbosity) # "--enable-error-code ignore-without-code" is purposefully omitted. # See https://github.com/python/typeshed/pull/8083 @@ -202,39 +203,111 @@ def run_testcases( mypy_command = [python_exe, "-m", "mypy"] + flags if verbosity is Verbosity.VERBOSE: - verbose_log(f"{mypy_command=}") + description = f"{package.name}/{version}/{platform}" + msg = f"{description}: {mypy_command=}\n" if "MYPYPATH" in env_vars: - verbose_log(f"{env_vars['MYPYPATH']=}") + msg += f"{description}: {env_vars['MYPYPATH']=}" else: - verbose_log("MYPYPATH not set") + msg += f"{description}: MYPYPATH not set" + msg += "\n" + verbose_log(msg, q) return subprocess.run(mypy_command, capture_output=True, text=True, env=env_vars) +@dataclass(frozen=True) +class Result: + code: int + command_run: str + stderr: str + stdout: str + test_case_dir: Path + tempdir: Path + + def print_description(self, *, verbosity: Verbosity) -> None: + if self.code: + print(f"{self.command_run}:", end=" ") + print_error("FAILURE\n") + replacements = (str(self.tempdir / TEST_CASES), str(self.test_case_dir)) + if self.stderr: + print_error(self.stderr, fix_path=replacements) + if self.stdout: + print_error(self.stdout, fix_path=replacements) + + def test_testcase_directory( - package: PackageInfo, version: str, platform: str, *, verbosity: Verbosity, tempdir: Path -) -> ReturnCode: - msg = f"Running mypy --platform {platform} --python-version {version} on the " - msg += "standard library test cases..." if package.is_stdlib else f"test cases for {package.name!r}..." + package: PackageInfo, version: str, platform: str, *, verbosity: Verbosity, tempdir: Path, q: PrintQueue +) -> Result: + msg = f"mypy --platform {platform} --python-version {version} on the " + msg += "standard library test cases" if package.is_stdlib else f"test cases for {package.name!r}" if verbosity > Verbosity.QUIET: - print(msg, end=" ", flush=True) - - result = run_testcases(package=package, version=version, platform=platform, tempdir=tempdir, verbosity=verbosity) - - if result.returncode: - if verbosity is Verbosity.QUIET: - # We'll already have printed this if --verbosity QUIET wasn't passed. - # If --verbosity QUIET was passed, only print this if there were errors. - # If there are errors, the output is inscrutable if this isn't printed. - print(msg, end=" ") - print_error("failure\n") - replacements = (str(tempdir / TEST_CASES), str(package.test_case_directory)) - if result.stderr: - print_error(result.stderr, fix_path=replacements) - if result.stdout: - print_error(result.stdout, fix_path=replacements) - elif verbosity > Verbosity.QUIET: - print_success_msg() - return result.returncode + q.put(f"Running {msg}...") + + proc_info = run_testcases(package=package, version=version, platform=platform, tempdir=tempdir, verbosity=verbosity, q=q) + return Result( + code=proc_info.returncode, + command_run=msg, + stderr=proc_info.stderr, + stdout=proc_info.stdout, + test_case_dir=package.test_case_directory, + tempdir=tempdir, + ) + + +def print_queued_messages(ev: threading.Event, q: PrintQueue) -> None: + while not ev.is_set(): + with suppress(queue.Empty): + print(q.get(timeout=0.5), flush=True) + while True: + try: + msg = q.get_nowait() + except queue.Empty: + return + else: + print(msg, flush=True) + + +def concurrently_run_testcases( + stack: ExitStack, + testcase_directories: list[PackageInfo], + verbosity: Verbosity, + platforms_to_test: list[str], + versions_to_test: list[str], +) -> list[Result]: + packageinfo_to_tempdir = { + package_info: Path(stack.enter_context(tempfile.TemporaryDirectory())) for package_info in testcase_directories + } + + with multiprocessing.Manager() as manager: + event = threading.Event() + q: PrintQueue = manager.Queue() + + printer_thread = threading.Thread(target=print_queued_messages, args=(event, q)) + printer_thread.start() + + with concurrent.futures.ProcessPoolExecutor() as executor: + # Each temporary directory may be used by multiple processes concurrently during the next step; + # must make sure that they're all setup correctly before starting the next step, + # in order to avoid race conditions + testcase_futures = [ + executor.submit(setup_testcase_dir, package, tempdir, verbosity, q) + for package, tempdir in packageinfo_to_tempdir.items() + ] + concurrent.futures.wait(testcase_futures) + + mypy_futures = [ + executor.submit( + test_testcase_directory, testcase_dir, version, platform, verbosity=verbosity, tempdir=tempdir, q=q + ) + for (testcase_dir, tempdir), platform, version in product( + packageinfo_to_tempdir.items(), platforms_to_test, versions_to_test + ) + ] + results = [future.result() for future in mypy_futures] + + event.set() + printer_thread.join() + + return results def main() -> ReturnCode: @@ -253,16 +326,23 @@ def main() -> ReturnCode: versions_to_test = args.versions_to_test or [f"3.{sys.version_info[1]}"] code = 0 - for testcase_dir in testcase_directories: - with tempfile.TemporaryDirectory() as td: - tempdir = Path(td) - for platform, version in product(platforms_to_test, versions_to_test): - this_code = test_testcase_directory(testcase_dir, version, platform, verbosity=verbosity, tempdir=tempdir) - code = max(code, this_code) + results: list[Result] | None = None + + with ExitStack() as stack: + results = concurrently_run_testcases(stack, testcase_directories, verbosity, platforms_to_test, versions_to_test) + + assert results is not None + print() + + for result in results: + result.print_description(verbosity=verbosity) + + code = max(result.code for result in results) + if code: - print_error("\nTest completed with errors") + print_error("Test completed with errors") else: - print(colored("\nTest completed successfully!", "green")) + print(colored("Test completed successfully!", "green")) return code From 6db4d908d08b1f6e9e8c0385133412262dd4a39d Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 15 Sep 2023 15:29:45 +0100 Subject: [PATCH 2/7] Deliberately break some test cases so you can see what it looks like when the test fails --- stubs/requests/@tests/test_cases/check_post.py | 5 ++++- test_cases/stdlib/builtins/check_dict.py | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/stubs/requests/@tests/test_cases/check_post.py b/stubs/requests/@tests/test_cases/check_post.py index d68a484d9c8e..0435da33e070 100644 --- a/stubs/requests/@tests/test_cases/check_post.py +++ b/stubs/requests/@tests/test_cases/check_post.py @@ -27,8 +27,11 @@ # ================================================================================= +fooo = bbbbbbbbbbbbbbbbbbbbbbba + + # Arbitrary iterable -def gen() -> Iterable[bytes]: +def gen() -> Iterable[bytes]: # type: ignore yield b"foo" yield b"bar" diff --git a/test_cases/stdlib/builtins/check_dict.py b/test_cases/stdlib/builtins/check_dict.py index aa920d045cbc..05d4d3b7129c 100644 --- a/test_cases/stdlib/builtins/check_dict.py +++ b/test_cases/stdlib/builtins/check_dict.py @@ -29,6 +29,8 @@ def keys(self) -> Iterable[_KT]: def __getitem__(self, __k: _KT) -> _VT: return self.data[__k] +fooooooooo = baaaaaaaaaaaaaaaaar + kt1: KeysAndGetItem[int, str] = KeysAndGetItem({0: ""}) assert_type(dict(kt1), Dict[int, str]) @@ -47,7 +49,7 @@ def test_iterable_tuple_overload(x: Iterable[tuple[int, str]]) -> dict[int, str] dict(i1, arg="a") # type: ignore i2: Iterable[tuple[str, int]] = [("a", 1), ("b", 2)] -assert_type(dict(i2, arg=1), Dict[str, int]) +assert_type(dict(i2, arg=1), Dict[str, int]) # type: ignore i3: Iterable[str] = ["a.b"] i4: Iterable[bytes] = [b"a.b"] From af193c3b9f927894deff79b9b1c5c9bc48fc18bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 Sep 2023 14:30:30 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks --- test_cases/stdlib/builtins/check_dict.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test_cases/stdlib/builtins/check_dict.py b/test_cases/stdlib/builtins/check_dict.py index 05d4d3b7129c..f501a837d40d 100644 --- a/test_cases/stdlib/builtins/check_dict.py +++ b/test_cases/stdlib/builtins/check_dict.py @@ -29,6 +29,7 @@ def keys(self) -> Iterable[_KT]: def __getitem__(self, __k: _KT) -> _VT: return self.data[__k] + fooooooooo = baaaaaaaaaaaaaaaaar From 3aa9cb4cbf73674fdf2ab474f79e598ec7681b75 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 15 Sep 2023 15:33:10 +0100 Subject: [PATCH 4/7] Revert "[pre-commit.ci] auto fixes from pre-commit.com hooks" This reverts commit af193c3b9f927894deff79b9b1c5c9bc48fc18bb. --- test_cases/stdlib/builtins/check_dict.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_cases/stdlib/builtins/check_dict.py b/test_cases/stdlib/builtins/check_dict.py index f501a837d40d..05d4d3b7129c 100644 --- a/test_cases/stdlib/builtins/check_dict.py +++ b/test_cases/stdlib/builtins/check_dict.py @@ -29,7 +29,6 @@ def keys(self) -> Iterable[_KT]: def __getitem__(self, __k: _KT) -> _VT: return self.data[__k] - fooooooooo = baaaaaaaaaaaaaaaaar From 19438a5563cdbd6445b35678a31dc2e450db5838 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Fri, 15 Sep 2023 15:33:23 +0100 Subject: [PATCH 5/7] Revert "Deliberately break some test cases so you can see what it looks like when the test fails" This reverts commit 6db4d908d08b1f6e9e8c0385133412262dd4a39d. --- stubs/requests/@tests/test_cases/check_post.py | 5 +---- test_cases/stdlib/builtins/check_dict.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/stubs/requests/@tests/test_cases/check_post.py b/stubs/requests/@tests/test_cases/check_post.py index 0435da33e070..d68a484d9c8e 100644 --- a/stubs/requests/@tests/test_cases/check_post.py +++ b/stubs/requests/@tests/test_cases/check_post.py @@ -27,11 +27,8 @@ # ================================================================================= -fooo = bbbbbbbbbbbbbbbbbbbbbbba - - # Arbitrary iterable -def gen() -> Iterable[bytes]: # type: ignore +def gen() -> Iterable[bytes]: yield b"foo" yield b"bar" diff --git a/test_cases/stdlib/builtins/check_dict.py b/test_cases/stdlib/builtins/check_dict.py index 05d4d3b7129c..aa920d045cbc 100644 --- a/test_cases/stdlib/builtins/check_dict.py +++ b/test_cases/stdlib/builtins/check_dict.py @@ -29,8 +29,6 @@ def keys(self) -> Iterable[_KT]: def __getitem__(self, __k: _KT) -> _VT: return self.data[__k] -fooooooooo = baaaaaaaaaaaaaaaaar - kt1: KeysAndGetItem[int, str] = KeysAndGetItem({0: ""}) assert_type(dict(kt1), Dict[int, str]) @@ -49,7 +47,7 @@ def test_iterable_tuple_overload(x: Iterable[tuple[int, str]]) -> dict[int, str] dict(i1, arg="a") # type: ignore i2: Iterable[tuple[str, int]] = [("a", 1), ("b", 2)] -assert_type(dict(i2, arg=1), Dict[str, int]) # type: ignore +assert_type(dict(i2, arg=1), Dict[str, int]) i3: Iterable[str] = ["a.b"] i4: Iterable[bytes] = [b"a.b"] From d53781fd152b8428a51a693b350fd7f3bf975448 Mon Sep 17 00:00:00 2001 From: AlexWaygood Date: Sat, 23 Sep 2023 15:19:56 +0100 Subject: [PATCH 6/7] Use threading instead --- tests/regr_test.py | 87 +++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 47 deletions(-) diff --git a/tests/regr_test.py b/tests/regr_test.py index ba89e769e68a..ada937a5eff7 100755 --- a/tests/regr_test.py +++ b/tests/regr_test.py @@ -5,7 +5,6 @@ import argparse import concurrent.futures -import multiprocessing import os import queue import re @@ -109,14 +108,16 @@ class Verbosity(IntEnum): ), ) +_PRINT_QUEUE: queue.SimpleQueue[str] = queue.SimpleQueue() -def verbose_log(msg: str, q: PrintQueue) -> None: - q.put(colored(msg, "blue")) +def verbose_log(msg: str) -> None: + _PRINT_QUEUE.put(colored(msg, "blue")) -def setup_testcase_dir(package: PackageInfo, tempdir: Path, verbosity: Verbosity, q: PrintQueue) -> None: + +def setup_testcase_dir(package: PackageInfo, tempdir: Path, verbosity: Verbosity) -> None: if verbosity is verbosity.VERBOSE: - verbose_log(f"{package.name}: Setting up testcase dir in {tempdir}", q) + verbose_log(f"{package.name}: Setting up testcase dir in {tempdir}") # --warn-unused-ignores doesn't work for files inside typeshed. # SO, to work around this, we copy the test_cases directory into a TemporaryDirectory, # and run the test cases inside of that. @@ -147,16 +148,16 @@ def setup_testcase_dir(package: PackageInfo, tempdir: Path, verbosity: Verbosity # Use --no-cache-dir to avoid issues with concurrent read/writes to the cache pip_command = [pip_exe, "install", get_mypy_req(), *requirements.external_pkgs, "--no-cache-dir"] if verbosity is Verbosity.VERBOSE: - verbose_log(f"{package.name}: Setting up venv in {tempdir / VENV_DIR}. {pip_command=}\n", q) + verbose_log(f"{package.name}: Setting up venv in {tempdir / VENV_DIR}. {pip_command=}\n") try: subprocess.run(pip_command, check=True, capture_output=True, text=True) except subprocess.CalledProcessError as e: - q.put(f"{package.name}\n{e.stderr}") + _PRINT_QUEUE.put(f"{package.name}\n{e.stderr}") raise def run_testcases( - package: PackageInfo, version: str, platform: str, *, tempdir: Path, verbosity: Verbosity, q: PrintQueue + package: PackageInfo, version: str, platform: str, *, tempdir: Path, verbosity: Verbosity ) -> subprocess.CompletedProcess[str]: env_vars = dict(os.environ) new_test_case_dir = tempdir / TEST_CASES @@ -210,7 +211,7 @@ def run_testcases( else: msg += f"{description}: MYPYPATH not set" msg += "\n" - verbose_log(msg, q) + verbose_log(msg) return subprocess.run(mypy_command, capture_output=True, text=True, env=env_vars) @@ -234,15 +235,13 @@ def print_description(self, *, verbosity: Verbosity) -> None: print_error(self.stdout, fix_path=replacements) -def test_testcase_directory( - package: PackageInfo, version: str, platform: str, *, verbosity: Verbosity, tempdir: Path, q: PrintQueue -) -> Result: +def test_testcase_directory(package: PackageInfo, version: str, platform: str, *, verbosity: Verbosity, tempdir: Path) -> Result: msg = f"mypy --platform {platform} --python-version {version} on the " msg += "standard library test cases" if package.is_stdlib else f"test cases for {package.name!r}" if verbosity > Verbosity.QUIET: - q.put(f"Running {msg}...") + _PRINT_QUEUE.put(f"Running {msg}...") - proc_info = run_testcases(package=package, version=version, platform=platform, tempdir=tempdir, verbosity=verbosity, q=q) + proc_info = run_testcases(package=package, version=version, platform=platform, tempdir=tempdir, verbosity=verbosity) return Result( code=proc_info.returncode, command_run=msg, @@ -253,13 +252,13 @@ def test_testcase_directory( ) -def print_queued_messages(ev: threading.Event, q: PrintQueue) -> None: +def print_queued_messages(ev: threading.Event) -> None: while not ev.is_set(): with suppress(queue.Empty): - print(q.get(timeout=0.5), flush=True) + print(_PRINT_QUEUE.get(timeout=0.5), flush=True) while True: try: - msg = q.get_nowait() + msg = _PRINT_QUEUE.get_nowait() except queue.Empty: return else: @@ -277,36 +276,30 @@ def concurrently_run_testcases( package_info: Path(stack.enter_context(tempfile.TemporaryDirectory())) for package_info in testcase_directories } - with multiprocessing.Manager() as manager: - event = threading.Event() - q: PrintQueue = manager.Queue() - - printer_thread = threading.Thread(target=print_queued_messages, args=(event, q)) - printer_thread.start() - - with concurrent.futures.ProcessPoolExecutor() as executor: - # Each temporary directory may be used by multiple processes concurrently during the next step; - # must make sure that they're all setup correctly before starting the next step, - # in order to avoid race conditions - testcase_futures = [ - executor.submit(setup_testcase_dir, package, tempdir, verbosity, q) - for package, tempdir in packageinfo_to_tempdir.items() - ] - concurrent.futures.wait(testcase_futures) - - mypy_futures = [ - executor.submit( - test_testcase_directory, testcase_dir, version, platform, verbosity=verbosity, tempdir=tempdir, q=q - ) - for (testcase_dir, tempdir), platform, version in product( - packageinfo_to_tempdir.items(), platforms_to_test, versions_to_test - ) - ] - results = [future.result() for future in mypy_futures] - - event.set() - printer_thread.join() - + event = threading.Event() + printer_thread = threading.Thread(target=print_queued_messages, args=(event,)) + printer_thread.start() + + with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor: + # Each temporary directory may be used by multiple processes concurrently during the next step; + # must make sure that they're all setup correctly before starting the next step, + # in order to avoid race conditions + testcase_futures = [ + executor.submit(setup_testcase_dir, package, tempdir, verbosity) + for package, tempdir in packageinfo_to_tempdir.items() + ] + concurrent.futures.wait(testcase_futures) + + mypy_futures = [ + executor.submit(test_testcase_directory, testcase_dir, version, platform, verbosity=verbosity, tempdir=tempdir) + for (testcase_dir, tempdir), platform, version in product( + packageinfo_to_tempdir.items(), platforms_to_test, versions_to_test + ) + ] + results = [future.result() for future in mypy_futures] + + event.set() + printer_thread.join() return results From e863f98330f88b51e47e041e05a94340a5b93738 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Sat, 23 Sep 2023 15:20:56 +0100 Subject: [PATCH 7/7] don't need that any more --- tests/regr_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/regr_test.py b/tests/regr_test.py index ada937a5eff7..f7a2c214015f 100755 --- a/tests/regr_test.py +++ b/tests/regr_test.py @@ -33,7 +33,6 @@ ) ReturnCode: TypeAlias = int -PrintQueue: TypeAlias = queue.Queue[str] TEST_CASES = "test_cases" VENV_DIR = ".venv"