From aca0d174dd38c292d41808e839e8a36c30ccd0ee Mon Sep 17 00:00:00 2001 From: Eric Arellano <14852634+Eric-Arellano@users.noreply.github.com> Date: Mon, 22 Apr 2024 13:55:02 -0400 Subject: [PATCH] Use asyncio to parallelize notebook testing (#1201) An alternative to https://github.com/Qiskit/documentation/pull/1181. This should help when executing many notebooks that submit jobs as they'll all submit their jobs immediately and begin queueing in parallel. --------- Co-authored-by: Frank Harkins --- scripts/nb-tester/test-notebook.py | 49 ++++++++++++++---------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/scripts/nb-tester/test-notebook.py b/scripts/nb-tester/test-notebook.py index 9ce5dbdf454..3d2f1de5448 100644 --- a/scripts/nb-tester/test-notebook.py +++ b/scripts/nb-tester/test-notebook.py @@ -11,6 +11,7 @@ # been altered from the originals. import argparse +import asyncio import sys import textwrap from dataclasses import dataclass @@ -81,11 +82,11 @@ class NotebookWarning: cell_index: int msg: str - def report(self): + def format(self) -> str: """ - Format warning and print it + Format warning to pretty string """ - message = f"Warning detected in cell {self.cell_index}:\n" + message = f"Cell {self.cell_index}:\n" for line in self.msg.splitlines(): message += ( textwrap.fill( @@ -93,14 +94,7 @@ def report(self): ) + "\n" ) - print_yellow(message, flush=True) - - -def print_yellow(s: str, **kwargs): - """ - Use ANSI escape codes to print yellow text - """ - print(f"\033[0;33m{s}\033[0m", **kwargs) + return message def extract_warnings(notebook: nbformat.NotebookNode) -> list[NotebookWarning]: @@ -121,33 +115,34 @@ def extract_warnings(notebook: nbformat.NotebookNode) -> list[NotebookWarning]: return notebook_warnings -def execute_notebook(path: Path, args: argparse.Namespace) -> bool: +async def execute_notebook(path: Path, args: argparse.Namespace) -> bool: """ Wrapper function for `_execute_notebook` to print status """ - print(f"▶️ {path}", end="", flush=True) + print(f"▶️ Executing {path}") possible_exceptions = ( nbconvert.preprocessors.CellExecutionError, nbclient.exceptions.CellTimeoutError, ) try: - nb = _execute_notebook(path, args) + nb = await _execute_notebook(path, args) except possible_exceptions as err: - print("\r❌\n") - print(err) + print(f"❌ Problem in {path}:\n{err}") return False notebook_warnings = extract_warnings(nb) if notebook_warnings: - print("\r⚠️") - [w.report() for w in notebook_warnings] + print( + f"⚠️ Warnings in {path}:\n" + + "\n".join((w.format() for w in notebook_warnings)) + ) return False - print("\r✅") + print(f"✅ No problems in {path}") return True -def _execute_notebook(filepath: Path, args: argparse.Namespace) -> nbformat.NotebookNode: +async def _execute_notebook(filepath: Path, args: argparse.Namespace) -> nbformat.NotebookNode: """ Use nbconvert to execute notebook. """ @@ -156,12 +151,14 @@ def _execute_notebook(filepath: Path, args: argparse.Namespace) -> nbformat.Note processor = nbconvert.preprocessors.ExecutePreprocessor( # If submitting jobs, we want to wait forever (-1 means no timeout) - timeout=-1 if submit_jobs else 100, + timeout=-1 if submit_jobs else 300, kernel_name="python3", extra_arguments=["--InlineBackend.figure_format='svg'"] ) - processor.preprocess(nb) + # This runs the notebook, including possibly submitting jobs. We run it in a + # new thread to avoid blocking other notebooks from submitting jobs. + await asyncio.to_thread(processor.preprocess, nb) if not args.write: return nb @@ -191,7 +188,7 @@ def cancel_trailing_jobs(start_time: datetime) -> bool: """ Cancel any runtime jobs created after `start_time`. - Return True if non exist, False otherwise. + Return True if none exist, False otherwise. Notebooks should not submit jobs during a normal test run. If they do, the cell will time out and this function will cancel the job to avoid wasting @@ -255,7 +252,7 @@ def create_argument_parser() -> argparse.ArgumentParser: return parser -def main() -> None: +async def main() -> None: args = create_argument_parser().parse_args() paths = map(Path, args.filenames or find_notebooks()) filtered_paths = filter_paths(paths, args) @@ -263,7 +260,7 @@ def main() -> None: # Execute notebooks start_time = datetime.now() print("Executing notebooks:") - results = [execute_notebook(path, args) for path in filtered_paths] + results = await asyncio.gather(*(execute_notebook(path, args) for path in filtered_paths)) print("Checking for trailing jobs...") results.append(cancel_trailing_jobs(start_time)) if not all(results): @@ -272,4 +269,4 @@ def main() -> None: if __name__ == "__main__": - main() + asyncio.run(main())