feature-benchmark: Record min/max/mean/variance of wallclock

Not to be used as regression markers but for us to continuously monitor performance and find interesting trends
MaterializeInc · Aug 26, 2024 · 1328e8b · 1328e8b
1 parent a95d82e
commit 1328e8b
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 19 deletions.
diff --git a/misc/python/materialize/feature_benchmark/benchmark.py b/misc/python/materialize/feature_benchmark/benchmark.py
@@ -8,7 +8,8 @@
 # by the Apache License, Version 2.0.
 
 from collections.abc import Iterable
-from typing import Any
+from statistics import mean, variance
+from typing import Generic, TypeVar
 
 from materialize import ui
 from materialize.feature_benchmark.aggregation import Aggregation
@@ -232,6 +233,25 @@ def _collect_memory_measurement(
                 aggregation.append_measurement(memory_measurement)
 
 
+T = TypeVar("T", bound=int | float)
+
+
+class ReportMeasurement(Generic[T]):
+    result: T
+    min: T
+    max: T
+    mean: T
+    variance: float
+
+    def __init__(self, points: list[T]):
+        self.result = points[0]
+        if self.result is not None:
+            self.min = min(points)
+            self.max = max(points)
+            self.mean = mean(points)
+            self.variance = variance(points)
+
+
 class Report:
     def __init__(self, cycle_number: int) -> None:
         self.cycle_number = cycle_number
@@ -270,12 +290,14 @@ def as_string(self, use_colors: bool, limit_to_scenario: str | None = None) -> s
     def __str__(self) -> str:
         return self.as_string(use_colors=False)
 
-    def measurements_of_this(self, scenario_name: str) -> dict[MeasurementType, Any]:
+    def measurements_of_this(
+        self, scenario_name: str
+    ) -> dict[MeasurementType, ReportMeasurement]:
         result = dict()
 
         for comparison in self._comparisons:
             if comparison.name == scenario_name:
-                result[comparison.type] = comparison.this()
+                result[comparison.type] = ReportMeasurement(comparison.points_this())
 
         return result
 

diff --git a/misc/python/materialize/feature_benchmark/benchmark_versioning.py b/misc/python/materialize/feature_benchmark/benchmark_versioning.py
@@ -19,7 +19,7 @@
 # Consider increasing the #FEATURE_BENCHMARK_FRAMEWORK_VERSION if changes are expected to impact results!
 SHA256_OF_FRAMEWORK: dict[str, str] = {}
 SHA256_OF_FRAMEWORK["*"] = (
-    "9e5ed3ae21972101c8cef1172ffaaab73051c192fd4ddcdc772b74eb96c1e972"
+    "3833f4d8f9fd24a4f14af873415c4f7b85f28b78a042906c4fb3bfccb1d47e82"
 )
 
 # Consider increasing the scenario's class #version() if changes are expected to impact results!

diff --git a/misc/python/materialize/feature_benchmark/comparator.py b/misc/python/materialize/feature_benchmark/comparator.py
@@ -46,6 +46,9 @@ def append_point(
     def this(self) -> T:
         return self._points[0]
 
+    def points_this(self) -> list[T]:
+        return self._points
+
     def this_as_str(self) -> str:
         if self.this() is None:
             return "           None"

diff --git a/...hon/materialize/test_analytics/data/feature_benchmark/feature_benchmark_result_storage.py b/...hon/materialize/test_analytics/data/feature_benchmark/feature_benchmark_result_storage.py
@@ -10,6 +10,7 @@
 
 from materialize import buildkite
 from materialize.buildkite import BuildkiteEnvVar
+from materialize.feature_benchmark.benchmark import ReportMeasurement
 from materialize.test_analytics.data.base_data_storage import BaseDataStorage
 
 
@@ -20,10 +21,10 @@ class FeatureBenchmarkResultEntry:
     scenario_version: str
     cycle: int
     scale: str
-    wallclock: float | None
-    messages: int | None
-    memory_mz: float | None
-    memory_clusterd: float | None
+    wallclock: ReportMeasurement[float] | None
+    messages: ReportMeasurement[int] | None
+    memory_mz: ReportMeasurement[float] | None
+    memory_clusterd: ReportMeasurement[float] | None
 
 
 class FeatureBenchmarkResultStorage(BaseDataStorage):
@@ -53,7 +54,11 @@ def add_result(
                     wallclock,
                     messages,
                     memory_mz,
-                    memory_clusterd
+                    memory_clusterd,
+                    wallclock_min,
+                    wallclock_max,
+                    wallclock_mean,
+                    wallclock_variance
                 )
                 SELECT
                     '{job_id}',
@@ -63,10 +68,14 @@ def add_result(
                     '{result_entry.scenario_version}',
                     {result_entry.cycle},
                     '{result_entry.scale}',
-                    {result_entry.wallclock or 'NULL::DOUBLE'},
-                    {result_entry.messages or 'NULL::INT'},
-                    {result_entry.memory_mz or 'NULL::DOUBLE'},
-                    {result_entry.memory_clusterd or 'NULL::DOUBLE'}
+                    {result_entry.wallclock.result if result_entry.wallclock else 'NULL::DOUBLE'},
+                    {result_entry.messages.result if result_entry.messages else 'NULL::INT'},
+                    {result_entry.memory_mz.result if result_entry.memory_mz else 'NULL::DOUBLE'},
+                    {result_entry.memory_clusterd.result if result_entry.memory_clusterd else 'NULL::DOUBLE'},
+                    {result_entry.wallclock.min if result_entry.wallclock else 'NULL::DOUBLE'},
+                    {result_entry.wallclock.max if result_entry.wallclock else 'NULL::DOUBLE'},
+                    {result_entry.wallclock.mean if result_entry.wallclock else 'NULL::DOUBLE'},
+                    {result_entry.wallclock.variance if result_entry.wallclock else 'NULL::DOUBLE'}
                 ;
                 """
             )
@@ -97,15 +106,23 @@ def add_discarded_entries(
                     messages,
                     memory_mz,
                     memory_clusterd
+                    wallclock_min,
+                    wallclock_max,
+                    wallclock_mean,
+                    wallclock_variance
                 )
                 SELECT
                     '{job_id}',
                     '{discarded_entry.scenario_name}',
                     {discarded_entry.cycle},
-                    {discarded_entry.wallclock or 'NULL::DOUBLE'},
-                    {discarded_entry.messages or 'NULL::INT'},
-                    {discarded_entry.memory_mz or 'NULL::DOUBLE'},
-                    {discarded_entry.memory_clusterd or 'NULL::DOUBLE'}
+                    {discarded_entry.wallclock.result if discarded_entry.wallclock else 'NULL::DOUBLE'},
+                    {discarded_entry.messages.result if discarded_entry.messages else 'NULL::INT'},
+                    {discarded_entry.memory_mz.result if discarded_entry.memory_mz  else 'NULL::DOUBLE'},
+                    {discarded_entry.memory_clusterd.result if discarded_entry.memory_clusterd else 'NULL::DOUBLE'},
+                    {discarded_entry.wallclock.min if discarded_entry.wallclock else 'NULL::DOUBLE'},
+                    {discarded_entry.wallclock.max if discarded_entry.wallclock else 'NULL::DOUBLE'},
+                    {discarded_entry.wallclock.mean if discarded_entry.wallclock else 'NULL::DOUBLE'},
+                    {discarded_entry.wallclock.variance if discarded_entry.wallclock else 'NULL::DOUBLE'}
                 ;
                 """
             )

diff --git a/misc/python/materialize/test_analytics/setup/tables/10-feature-benchmark.sql b/misc/python/materialize/test_analytics/setup/tables/10-feature-benchmark.sql
@@ -20,7 +20,11 @@ CREATE TABLE feature_benchmark_result (
    wallclock DOUBLE,
    messages INT,
    memory_mz DOUBLE,
-   memory_clusterd DOUBLE
+   memory_clusterd DOUBLE,
+   wallclock_min DOUBLE,
+   wallclock_max DOUBLE,
+   wallclock_mean DOUBLE,
+   wallclock_variance DOUBLE
 );
 
 -- This table holds results of runs that were discarded.
@@ -31,7 +35,11 @@ CREATE TABLE feature_benchmark_discarded_result (
    wallclock DOUBLE,
    messages INT,
    memory_mz DOUBLE,
-   memory_clusterd DOUBLE
+   memory_clusterd DOUBLE,
+   wallclock_min DOUBLE,
+   wallclock_max DOUBLE,
+   wallclock_mean DOUBLE,
+   wallclock_variance DOUBLE
 );
 
 GRANT SELECT, INSERT, UPDATE ON TABLE feature_benchmark_result TO "hetzner-ci";