From a130498451ce7ae7d8525abb6168cb6e5f82204d Mon Sep 17 00:00:00 2001
From: Max Mynter <maxmynter@me.com>
Date: Fri, 8 Mar 2024 17:47:22 +0100
Subject: [PATCH 1/5] Add Parameters to Benchmarks

Add the Parameters to a 'BenchmarkRecord' by appending it to the results.
Omit the type information from the `Variable` object and only retain variable
name and value.
---
 src/nnbench/runner.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py
index 1dacccb7..4a0a44df 100644
--- a/src/nnbench/runner.py
+++ b/src/nnbench/runner.py
@@ -89,7 +89,9 @@ def _canonicalize(t: type) -> type:
                     )
 
                 if typ == empty:
-                    logger.debug(f"parameter {name!r} untyped in benchmark {bm.fn.__name__}().")
+                    logger.debug(
+                        f"parameter {name!r} untyped in benchmark {bm.fn.__name__}()."
+                    )
 
                 if name in allvars:
                     currvar = allvars[name]
@@ -134,7 +136,9 @@ def clear(self) -> None:
         """Clear all registered benchmarks."""
         self.benchmarks.clear()
 
-    def collect(self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = ()) -> None:
+    def collect(
+        self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = ()
+    ) -> None:
         # TODO: functools.cache this guy
         """
         Discover benchmarks in a module and memoize them for later use.
@@ -226,7 +230,9 @@ def run(
 
         # if we still have no benchmarks after collection, warn and return an empty record.
         if not self.benchmarks:
-            warnings.warn(f"No benchmarks found in path/module {str(path_or_module)!r}.")
+            warnings.warn(
+                f"No benchmarks found in path/module {str(path_or_module)!r}."
+            )
             return BenchmarkRecord(context=Context(), benchmarks=[])
 
         params = params or {}
@@ -254,7 +260,10 @@ def run(
 
         results: list[dict[str, Any]] = []
         for benchmark in self.benchmarks:
-            bmparams = {k: v for k, v in dparams.items() if k in benchmark.interface.names}
+            bmparams = {
+                k: v for k, v in dparams.items() if k in benchmark.interface.names
+            }
+            bmdefaults = {k: v for (k, t, v) in benchmark.interface.variables}
             # TODO: Wrap this into an execution context
             res: dict[str, Any] = {
                 "name": benchmark.name,
@@ -263,6 +272,7 @@ def run(
                 "date": datetime.now().isoformat(timespec="seconds"),
                 "error_occurred": False,
                 "error_message": "",
+                "parameters": {**bmdefaults, **bmparams},
             }
             try:
                 benchmark.setUp(**bmparams)

From bf72378ff1b9ee607a14256861d5eb878be705c3 Mon Sep 17 00:00:00 2001
From: Max Mynter <maxmynter@me.com>
Date: Fri, 8 Mar 2024 18:10:19 +0100
Subject: [PATCH 2/5] Update Readme to include parameter summary

rdme
---
 README.md             | 10 +++++-----
 src/nnbench/runner.py | 16 ++++------------
 2 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index 3b39b816..afcf421d 100644
--- a/README.md
+++ b/README.md
@@ -49,11 +49,11 @@ record = runner.run("__main__", params={"a": 2, "b": 10})
 rep = nnbench.BenchmarkReporter()
 rep.display(record)  # ...and print the results to the terminal.
 
-# results in a table like the following:
-# name     function    date                   value    time_ns
-# -------  ----------  -------------------  -------  ---------
-# product  product     2024-03-07T10:14:21       20       1000
-# power    power       2024-03-07T10:14:21     1024        750
+# results in a table look like the following:
+# name     function    date                 parameters         value    time_ns
+# -------  ----------  -------------------  -----------------  -------  ---------
+# product  product     2024-03-08T18:03:48  {'a': 2, 'b': 10}       20       1000
+# power    power       2024-03-08T18:03:48  {'a': 2, 'b': 10}     1024        750
 ```
 
 For a more realistic example of how to evaluate a trained model with a benchmark suite, check the [Quickstart](https://aai-institute.github.io/nnbench/latest/quickstart/).
diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py
index 4a0a44df..793b60c2 100644
--- a/src/nnbench/runner.py
+++ b/src/nnbench/runner.py
@@ -89,9 +89,7 @@ def _canonicalize(t: type) -> type:
                     )
 
                 if typ == empty:
-                    logger.debug(
-                        f"parameter {name!r} untyped in benchmark {bm.fn.__name__}()."
-                    )
+                    logger.debug(f"parameter {name!r} untyped in benchmark {bm.fn.__name__}().")
 
                 if name in allvars:
                     currvar = allvars[name]
@@ -136,9 +134,7 @@ def clear(self) -> None:
         """Clear all registered benchmarks."""
         self.benchmarks.clear()
 
-    def collect(
-        self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = ()
-    ) -> None:
+    def collect(self, path_or_module: str | os.PathLike[str], tags: tuple[str, ...] = ()) -> None:
         # TODO: functools.cache this guy
         """
         Discover benchmarks in a module and memoize them for later use.
@@ -230,9 +226,7 @@ def run(
 
         # if we still have no benchmarks after collection, warn and return an empty record.
         if not self.benchmarks:
-            warnings.warn(
-                f"No benchmarks found in path/module {str(path_or_module)!r}."
-            )
+            warnings.warn(f"No benchmarks found in path/module {str(path_or_module)!r}.")
             return BenchmarkRecord(context=Context(), benchmarks=[])
 
         params = params or {}
@@ -260,9 +254,7 @@ def run(
 
         results: list[dict[str, Any]] = []
         for benchmark in self.benchmarks:
-            bmparams = {
-                k: v for k, v in dparams.items() if k in benchmark.interface.names
-            }
+            bmparams = {k: v for k, v in dparams.items() if k in benchmark.interface.names}
             bmdefaults = {k: v for (k, t, v) in benchmark.interface.variables}
             # TODO: Wrap this into an execution context
             res: dict[str, Any] = {

From 70dde070944ae8b78208e1fbd625d50fa0a5278a Mon Sep 17 00:00:00 2001
From: Max Mynter <maxmynter@me.com>
Date: Fri, 8 Mar 2024 18:33:42 +0100
Subject: [PATCH 3/5] Add test for filtering via parameters

---
 tests/benchmarks/parametrized.py |  6 ++++++
 tests/test_runner.py             | 18 ++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 tests/benchmarks/parametrized.py

diff --git a/tests/benchmarks/parametrized.py b/tests/benchmarks/parametrized.py
new file mode 100644
index 00000000..a3af24b4
--- /dev/null
+++ b/tests/benchmarks/parametrized.py
@@ -0,0 +1,6 @@
+import nnbench
+
+
+@nnbench.parametrize([{"a": 1}, {"a": 2}], tags=("parametrized",))
+def double(a: int) -> int:
+    return 2 * 2
diff --git a/tests/test_runner.py b/tests/test_runner.py
index 2fa4a38d..41766899 100644
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@@ -71,3 +71,21 @@ def duplicate_context_provider() -> dict[str, str]:
             params={"x": 1, "y": 1},
             context=context_providers,
         )
+
+
+def test_filter_benchmarks_on_params(testfolder: str) -> None:
+    r = nnbench.BenchmarkRunner()
+    results = r.run(testfolder, tags=("parametrized",))
+    print(results)
+    assert len(results.benchmarks) == 2
+    assert (
+        len(
+            list(
+                filter(
+                    lambda bm: bm["parameters"]["a"] == 1,
+                    results.benchmarks,
+                )
+            )
+        )
+        == 1
+    )

From 525ddea64ead8065dfe6b12553c924f8a61f1395 Mon Sep 17 00:00:00 2001
From: Max Mynter <32773644+maxmynter@users.noreply.github.com>
Date: Fri, 8 Mar 2024 20:11:11 +0100
Subject: [PATCH 4/5] Update src/nnbench/runner.py

Co-authored-by: Nicholas Junge <n.junge@appliedai.de>
---
 src/nnbench/runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py
index 793b60c2..c7bea5d5 100644
--- a/src/nnbench/runner.py
+++ b/src/nnbench/runner.py
@@ -255,7 +255,7 @@ def run(
         results: list[dict[str, Any]] = []
         for benchmark in self.benchmarks:
             bmparams = {k: v for k, v in dparams.items() if k in benchmark.interface.names}
-            bmdefaults = {k: v for (k, t, v) in benchmark.interface.variables}
+            bmdefaults = {k: v for (k, _, v) in benchmark.interface.variables}
             # TODO: Wrap this into an execution context
             res: dict[str, Any] = {
                 "name": benchmark.name,

From 0c0a107fd462759660b3a1377dfdae02881400c0 Mon Sep 17 00:00:00 2001
From: Nicholas Junge <nicholas.junge@web.de>
Date: Sun, 10 Mar 2024 20:41:06 +0100
Subject: [PATCH 5/5] Inline benchmark into test, use update syntax on dict

What's left is to decide what to do with parameter types that aren't as
easily ingestible into a sink as builtin datatypes.
---
 src/nnbench/runner.py            |  2 +-
 tests/benchmarks/parametrized.py |  6 ------
 tests/test_runner.py             | 25 +++++++++++--------------
 3 files changed, 12 insertions(+), 21 deletions(-)
 delete mode 100644 tests/benchmarks/parametrized.py

diff --git a/src/nnbench/runner.py b/src/nnbench/runner.py
index c7bea5d5..d3417b58 100644
--- a/src/nnbench/runner.py
+++ b/src/nnbench/runner.py
@@ -264,7 +264,7 @@ def run(
                 "date": datetime.now().isoformat(timespec="seconds"),
                 "error_occurred": False,
                 "error_message": "",
-                "parameters": {**bmdefaults, **bmparams},
+                "parameters": bmdefaults | bmparams,
             }
             try:
                 benchmark.setUp(**bmparams)
diff --git a/tests/benchmarks/parametrized.py b/tests/benchmarks/parametrized.py
deleted file mode 100644
index a3af24b4..00000000
--- a/tests/benchmarks/parametrized.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import nnbench
-
-
-@nnbench.parametrize([{"a": 1}, {"a": 2}], tags=("parametrized",))
-def double(a: int) -> int:
-    return 2 * 2
diff --git a/tests/test_runner.py b/tests/test_runner.py
index 41766899..c1aa2cae 100644
--- a/tests/test_runner.py
+++ b/tests/test_runner.py
@@ -74,18 +74,15 @@ def duplicate_context_provider() -> dict[str, str]:
 
 
 def test_filter_benchmarks_on_params(testfolder: str) -> None:
+    @nnbench.benchmark
+    def prod(a: int, b: int = 1) -> int:
+        return a * b
+
     r = nnbench.BenchmarkRunner()
-    results = r.run(testfolder, tags=("parametrized",))
-    print(results)
-    assert len(results.benchmarks) == 2
-    assert (
-        len(
-            list(
-                filter(
-                    lambda bm: bm["parameters"]["a"] == 1,
-                    results.benchmarks,
-                )
-            )
-        )
-        == 1
-    )
+    r.benchmarks.append(prod)
+    # TODO (nicholasjng): This is hacky
+    rec1 = r.run("", params={"a": 1, "b": 2})
+    assert rec1.benchmarks[0]["parameters"] == {"a": 1, "b": 2}
+    # Assert that the defaults are also present if not overridden.
+    rec2 = r.run("", params={"a": 1})
+    assert rec2.benchmarks[0]["parameters"] == {"a": 1, "b": 1}