From a130498451ce7ae7d8525abb6168cb6e5f82204d Mon Sep 17 00:00:00 2001 From: Max Mynter Date: Fri, 8 Mar 2024 17:47:22 +0100 Subject: [PATCH 1/5] Add Parameters to Benchmarks Add the Parameters to a 'BenchmarkRecord' by appending it to the results. Omit the type information from the `Variable` object and only retain variable name and value. --- src/nnbench/runner.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py index 1dacccb7..4a0a44df 100644 --- a/src/nnbench/runner.py +++ b/src/nnbench/runner.py @@ -89,7 +89,9 @@ def _canonicalize(t: type) -> type: ) if typ == empty: - logger.debug(f"parameter {name!r} untyped in benchmark {bm.fn.__name__}().") + logger.debug( + f"parameter {name!r} untyped in benchmark {bm.fn.__name__}()." + ) if name in allvars: currvar = allvars[name] @@ -134,7 +136,9 @@ def clear(self) -> None: """Clear all registered benchmarks.""" self.benchmarks.clear() - def collect(self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = ()) -> None: + def collect( + self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = () + ) -> None: # TODO: functools.cache this guy """ Discover benchmarks in a module and memoize them for later use. @@ -226,7 +230,9 @@ def run( # if we still have no benchmarks after collection, warn and return an empty record. if not self.benchmarks: - warnings.warn(f"No benchmarks found in path/module {str(path_or_module)!r}.") + warnings.warn( + f"No benchmarks found in path/module {str(path_or_module)!r}." + ) return BenchmarkRecord(context=Context(), benchmarks=[]) params = params or {} @@ -254,7 +260,10 @@ def run( results: list[dict[str, Any]] = [] for benchmark in self.benchmarks: - bmparams = {k: v for k, v in dparams.items() if k in benchmark.interface.names} + bmparams = { + k: v for k, v in dparams.items() if k in benchmark.interface.names + } + bmdefaults = {k: v for (k, t, v) in benchmark.interface.variables} # TODO: Wrap this into an execution context res: dict[str, Any] = { "name": benchmark.name, @@ -263,6 +272,7 @@ def run( "date": datetime.now().isoformat(timespec="seconds"), "error_occurred": False, "error_message": "", + "parameters": {**bmdefaults, **bmparams}, } try: benchmark.setUp(**bmparams) From bf72378ff1b9ee607a14256861d5eb878be705c3 Mon Sep 17 00:00:00 2001 From: Max Mynter Date: Fri, 8 Mar 2024 18:10:19 +0100 Subject: [PATCH 2/5] Update Readme to include parameter summary rdme --- README.md | 10 +++++----- src/nnbench/runner.py | 16 ++++------------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 3b39b816..afcf421d 100644 --- a/README.md +++ b/README.md @@ -49,11 +49,11 @@ record = runner.run("__main__", params={"a": 2, "b": 10}) rep = nnbench.BenchmarkReporter() rep.display(record) # ...and print the results to the terminal. -# results in a table like the following: -# name function date value time_ns -# ------- ---------- ------------------- ------- --------- -# product product 2024-03-07T10:14:21 20 1000 -# power power 2024-03-07T10:14:21 1024 750 +# results in a table look like the following: +# name function date parameters value time_ns +# ------- ---------- ------------------- ----------------- ------- --------- +# product product 2024-03-08T18:03:48 {'a': 2, 'b': 10} 20 1000 +# power power 2024-03-08T18:03:48 {'a': 2, 'b': 10} 1024 750 ``` For a more realistic example of how to evaluate a trained model with a benchmark suite, check the [Quickstart](https://aai-institute.github.io/nnbench/latest/quickstart/). diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py index 4a0a44df..793b60c2 100644 --- a/src/nnbench/runner.py +++ b/src/nnbench/runner.py @@ -89,9 +89,7 @@ def _canonicalize(t: type) -> type: ) if typ == empty: - logger.debug( - f"parameter {name!r} untyped in benchmark {bm.fn.__name__}()." - ) + logger.debug(f"parameter {name!r} untyped in benchmark {bm.fn.__name__}().") if name in allvars: currvar = allvars[name] @@ -136,9 +134,7 @@ def clear(self) -> None: """Clear all registered benchmarks.""" self.benchmarks.clear() - def collect( - self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = () - ) -> None: + def collect(self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = ()) -> None: # TODO: functools.cache this guy """ Discover benchmarks in a module and memoize them for later use. @@ -230,9 +226,7 @@ def run( # if we still have no benchmarks after collection, warn and return an empty record. if not self.benchmarks: - warnings.warn( - f"No benchmarks found in path/module {str(path_or_module)!r}." - ) + warnings.warn(f"No benchmarks found in path/module {str(path_or_module)!r}.") return BenchmarkRecord(context=Context(), benchmarks=[]) params = params or {} @@ -260,9 +254,7 @@ def run( results: list[dict[str, Any]] = [] for benchmark in self.benchmarks: - bmparams = { - k: v for k, v in dparams.items() if k in benchmark.interface.names - } + bmparams = {k: v for k, v in dparams.items() if k in benchmark.interface.names} bmdefaults = {k: v for (k, t, v) in benchmark.interface.variables} # TODO: Wrap this into an execution context res: dict[str, Any] = { From 70dde070944ae8b78208e1fbd625d50fa0a5278a Mon Sep 17 00:00:00 2001 From: Max Mynter Date: Fri, 8 Mar 2024 18:33:42 +0100 Subject: [PATCH 3/5] Add test for filtering via parameters --- tests/benchmarks/parametrized.py | 6 ++++++ tests/test_runner.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tests/benchmarks/parametrized.py diff --git a/tests/benchmarks/parametrized.py b/tests/benchmarks/parametrized.py new file mode 100644 index 00000000..a3af24b4 --- /dev/null +++ b/tests/benchmarks/parametrized.py @@ -0,0 +1,6 @@ +import nnbench + + +@nnbench.parametrize([{"a": 1}, {"a": 2}], tags=("parametrized",)) +def double(a: int) -> int: + return 2 * 2 diff --git a/tests/test_runner.py b/tests/test_runner.py index 2fa4a38d..41766899 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -71,3 +71,21 @@ def duplicate_context_provider() -> dict[str, str]: params={"x": 1, "y": 1}, context=context_providers, ) + + +def test_filter_benchmarks_on_params(testfolder: str) -> None: + r = nnbench.BenchmarkRunner() + results = r.run(testfolder, tags=("parametrized",)) + print(results) + assert len(results.benchmarks) == 2 + assert ( + len( + list( + filter( + lambda bm: bm["parameters"]["a"] == 1, + results.benchmarks, + ) + ) + ) + == 1 + ) From 525ddea64ead8065dfe6b12553c924f8a61f1395 Mon Sep 17 00:00:00 2001 From: Max Mynter <32773644+maxmynter@users.noreply.github.com> Date: Fri, 8 Mar 2024 20:11:11 +0100 Subject: [PATCH 4/5] Update src/nnbench/runner.py Co-authored-by: Nicholas Junge --- src/nnbench/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py index 793b60c2..c7bea5d5 100644 --- a/src/nnbench/runner.py +++ b/src/nnbench/runner.py @@ -255,7 +255,7 @@ def run( results: list[dict[str, Any]] = [] for benchmark in self.benchmarks: bmparams = {k: v for k, v in dparams.items() if k in benchmark.interface.names} - bmdefaults = {k: v for (k, t, v) in benchmark.interface.variables} + bmdefaults = {k: v for (k, _, v) in benchmark.interface.variables} # TODO: Wrap this into an execution context res: dict[str, Any] = { "name": benchmark.name, From 0c0a107fd462759660b3a1377dfdae02881400c0 Mon Sep 17 00:00:00 2001 From: Nicholas Junge Date: Sun, 10 Mar 2024 20:41:06 +0100 Subject: [PATCH 5/5] Inline benchmark into test, use update syntax on dict What's left is to decide what to do with parameter types that aren't as easily ingestible into a sink as builtin datatypes. --- src/nnbench/runner.py | 2 +- tests/benchmarks/parametrized.py | 6 ------ tests/test_runner.py | 25 +++++++++++-------------- 3 files changed, 12 insertions(+), 21 deletions(-) delete mode 100644 tests/benchmarks/parametrized.py diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py index c7bea5d5..d3417b58 100644 --- a/src/nnbench/runner.py +++ b/src/nnbench/runner.py @@ -264,7 +264,7 @@ def run( "date": datetime.now().isoformat(timespec="seconds"), "error_occurred": False, "error_message": "", - "parameters": {**bmdefaults, **bmparams}, + "parameters": bmdefaults | bmparams, } try: benchmark.setUp(**bmparams) diff --git a/tests/benchmarks/parametrized.py b/tests/benchmarks/parametrized.py deleted file mode 100644 index a3af24b4..00000000 --- a/tests/benchmarks/parametrized.py +++ /dev/null @@ -1,6 +0,0 @@ -import nnbench - - -@nnbench.parametrize([{"a": 1}, {"a": 2}], tags=("parametrized",)) -def double(a: int) -> int: - return 2 * 2 diff --git a/tests/test_runner.py b/tests/test_runner.py index 41766899..c1aa2cae 100644 --- a/tests/test_runner.py +++ b/tests/test_runner.py @@ -74,18 +74,15 @@ def duplicate_context_provider() -> dict[str, str]: def test_filter_benchmarks_on_params(testfolder: str) -> None: + @nnbench.benchmark + def prod(a: int, b: int = 1) -> int: + return a * b + r = nnbench.BenchmarkRunner() - results = r.run(testfolder, tags=("parametrized",)) - print(results) - assert len(results.benchmarks) == 2 - assert ( - len( - list( - filter( - lambda bm: bm["parameters"]["a"] == 1, - results.benchmarks, - ) - ) - ) - == 1 - ) + r.benchmarks.append(prod) + # TODO (nicholasjng): This is hacky + rec1 = r.run("", params={"a": 1, "b": 2}) + assert rec1.benchmarks[0]["parameters"] == {"a": 1, "b": 2} + # Assert that the defaults are also present if not overridden. + rec2 = r.run("", params={"a": 1}) + assert rec2.benchmarks[0]["parameters"] == {"a": 1, "b": 1}