Skip to content

Commit

Permalink
feature-benchmark: Record min/max/mean/variance of wallclock
Browse files Browse the repository at this point in the history
Not to be used as regression markers but for us to continuously monitor
performance and find interesting trends
  • Loading branch information
def- committed Aug 26, 2024
1 parent a95d82e commit 1328e8b
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 19 deletions.
28 changes: 25 additions & 3 deletions misc/python/materialize/feature_benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
# by the Apache License, Version 2.0.

from collections.abc import Iterable
from typing import Any
from statistics import mean, variance
from typing import Generic, TypeVar

from materialize import ui
from materialize.feature_benchmark.aggregation import Aggregation
Expand Down Expand Up @@ -232,6 +233,25 @@ def _collect_memory_measurement(
aggregation.append_measurement(memory_measurement)


T = TypeVar("T", bound=int | float)


class ReportMeasurement(Generic[T]):
result: T
min: T
max: T
mean: T
variance: float

def __init__(self, points: list[T]):
self.result = points[0]
if self.result is not None:
self.min = min(points)
self.max = max(points)
self.mean = mean(points)
self.variance = variance(points)


class Report:
def __init__(self, cycle_number: int) -> None:
self.cycle_number = cycle_number
Expand Down Expand Up @@ -270,12 +290,14 @@ def as_string(self, use_colors: bool, limit_to_scenario: str | None = None) -> s
def __str__(self) -> str:
return self.as_string(use_colors=False)

def measurements_of_this(self, scenario_name: str) -> dict[MeasurementType, Any]:
def measurements_of_this(
self, scenario_name: str
) -> dict[MeasurementType, ReportMeasurement]:
result = dict()

for comparison in self._comparisons:
if comparison.name == scenario_name:
result[comparison.type] = comparison.this()
result[comparison.type] = ReportMeasurement(comparison.points_this())

return result

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# Consider increasing the #FEATURE_BENCHMARK_FRAMEWORK_VERSION if changes are expected to impact results!
SHA256_OF_FRAMEWORK: dict[str, str] = {}
SHA256_OF_FRAMEWORK["*"] = (
"9e5ed3ae21972101c8cef1172ffaaab73051c192fd4ddcdc772b74eb96c1e972"
"3833f4d8f9fd24a4f14af873415c4f7b85f28b78a042906c4fb3bfccb1d47e82"
)

# Consider increasing the scenario's class #version() if changes are expected to impact results!
Expand Down
3 changes: 3 additions & 0 deletions misc/python/materialize/feature_benchmark/comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def append_point(
def this(self) -> T:
return self._points[0]

def points_this(self) -> list[T]:
return self._points

def this_as_str(self) -> str:
if self.this() is None:
return " None"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from materialize import buildkite
from materialize.buildkite import BuildkiteEnvVar
from materialize.feature_benchmark.benchmark import ReportMeasurement
from materialize.test_analytics.data.base_data_storage import BaseDataStorage


Expand All @@ -20,10 +21,10 @@ class FeatureBenchmarkResultEntry:
scenario_version: str
cycle: int
scale: str
wallclock: float | None
messages: int | None
memory_mz: float | None
memory_clusterd: float | None
wallclock: ReportMeasurement[float] | None
messages: ReportMeasurement[int] | None
memory_mz: ReportMeasurement[float] | None
memory_clusterd: ReportMeasurement[float] | None


class FeatureBenchmarkResultStorage(BaseDataStorage):
Expand Down Expand Up @@ -53,7 +54,11 @@ def add_result(
wallclock,
messages,
memory_mz,
memory_clusterd
memory_clusterd,
wallclock_min,
wallclock_max,
wallclock_mean,
wallclock_variance
)
SELECT
'{job_id}',
Expand All @@ -63,10 +68,14 @@ def add_result(
'{result_entry.scenario_version}',
{result_entry.cycle},
'{result_entry.scale}',
{result_entry.wallclock or 'NULL::DOUBLE'},
{result_entry.messages or 'NULL::INT'},
{result_entry.memory_mz or 'NULL::DOUBLE'},
{result_entry.memory_clusterd or 'NULL::DOUBLE'}
{result_entry.wallclock.result if result_entry.wallclock else 'NULL::DOUBLE'},
{result_entry.messages.result if result_entry.messages else 'NULL::INT'},
{result_entry.memory_mz.result if result_entry.memory_mz else 'NULL::DOUBLE'},
{result_entry.memory_clusterd.result if result_entry.memory_clusterd else 'NULL::DOUBLE'},
{result_entry.wallclock.min if result_entry.wallclock else 'NULL::DOUBLE'},
{result_entry.wallclock.max if result_entry.wallclock else 'NULL::DOUBLE'},
{result_entry.wallclock.mean if result_entry.wallclock else 'NULL::DOUBLE'},
{result_entry.wallclock.variance if result_entry.wallclock else 'NULL::DOUBLE'}
;
"""
)
Expand Down Expand Up @@ -97,15 +106,23 @@ def add_discarded_entries(
messages,
memory_mz,
memory_clusterd
wallclock_min,
wallclock_max,
wallclock_mean,
wallclock_variance
)
SELECT
'{job_id}',
'{discarded_entry.scenario_name}',
{discarded_entry.cycle},
{discarded_entry.wallclock or 'NULL::DOUBLE'},
{discarded_entry.messages or 'NULL::INT'},
{discarded_entry.memory_mz or 'NULL::DOUBLE'},
{discarded_entry.memory_clusterd or 'NULL::DOUBLE'}
{discarded_entry.wallclock.result if discarded_entry.wallclock else 'NULL::DOUBLE'},
{discarded_entry.messages.result if discarded_entry.messages else 'NULL::INT'},
{discarded_entry.memory_mz.result if discarded_entry.memory_mz else 'NULL::DOUBLE'},
{discarded_entry.memory_clusterd.result if discarded_entry.memory_clusterd else 'NULL::DOUBLE'},
{discarded_entry.wallclock.min if discarded_entry.wallclock else 'NULL::DOUBLE'},
{discarded_entry.wallclock.max if discarded_entry.wallclock else 'NULL::DOUBLE'},
{discarded_entry.wallclock.mean if discarded_entry.wallclock else 'NULL::DOUBLE'},
{discarded_entry.wallclock.variance if discarded_entry.wallclock else 'NULL::DOUBLE'}
;
"""
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@ CREATE TABLE feature_benchmark_result (
wallclock DOUBLE,
messages INT,
memory_mz DOUBLE,
memory_clusterd DOUBLE
memory_clusterd DOUBLE,
wallclock_min DOUBLE,
wallclock_max DOUBLE,
wallclock_mean DOUBLE,
wallclock_variance DOUBLE
);

-- This table holds results of runs that were discarded.
Expand All @@ -31,7 +35,11 @@ CREATE TABLE feature_benchmark_discarded_result (
wallclock DOUBLE,
messages INT,
memory_mz DOUBLE,
memory_clusterd DOUBLE
memory_clusterd DOUBLE,
wallclock_min DOUBLE,
wallclock_max DOUBLE,
wallclock_mean DOUBLE,
wallclock_variance DOUBLE
);

GRANT SELECT, INSERT, UPDATE ON TABLE feature_benchmark_result TO "hetzner-ci";
Expand Down

0 comments on commit 1328e8b

Please sign in to comment.