Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
dc453ec
refactor(bench): pre-generate data
aaronc Aug 28, 2025
bc8dd87
refactoring
aaronc Aug 28, 2025
b2c1ae7
refactoring
aaronc Aug 28, 2025
a0ca709
update, WIP on memiavl bench
aaronc Aug 28, 2025
c81f88b
working memiavl benchmarks
aaronc Aug 28, 2025
1d844d4
working iavl2 benchmarks
aaronc Aug 28, 2025
b41aa47
support target version properly, but iavl/v2 has bugs it seems...
aaronc Aug 28, 2025
f07b136
delete old code
aaronc Aug 28, 2025
def75d9
support iavl v1
aaronc Aug 28, 2025
48389c9
adding logging options
aaronc Aug 28, 2025
ddf31fc
fix mem tracking
aaronc Aug 28, 2025
1deec61
add disk usage and error exit code
aaronc Aug 29, 2025
56fcff5
add build info, etc
aaronc Aug 29, 2025
e1aa49c
add host info, etc.
aaronc Aug 29, 2025
fd138ed
combined runner
aaronc Aug 29, 2025
e687f7e
combined runner
aaronc Aug 29, 2025
64d1fc1
WIP on runner
aaronc Aug 29, 2025
1928f47
update runner
aaronc Aug 29, 2025
39bac3e
add more opts
aaronc Aug 29, 2025
ac6f83b
add visualization
aaronc Aug 29, 2025
7f45ca0
update dashboard
aaronc Aug 29, 2025
9057387
WIP on supporting alpha6
aaronc Aug 29, 2025
236bc11
make alpha6 work
aaronc Aug 29, 2025
75c58ab
adding sdk store benchmarks
aaronc Aug 29, 2025
8b2894f
fix builds
aaronc Aug 29, 2025
09d5e70
fix bug
aaronc Aug 29, 2025
80f8ba5
fix bugs with dir size, support jsonc plans
aaronc Aug 29, 2025
8bc14e1
fix store-v1 setup
aaronc Aug 29, 2025
9bfd1eb
fix store-v1 setup
aaronc Aug 29, 2025
f77ac96
update visualization
aaronc Sep 3, 2025
8c2fc56
move changeset and version to cmd args
aaronc Sep 3, 2025
8a4c2e2
add disk io and cpu tracking
aaronc Sep 3, 2025
3a191b8
add logging where possible
aaronc Sep 3, 2025
7254195
update logging, changeset gen
aaronc Sep 3, 2025
12eda6d
fix logging config
aaronc Sep 3, 2025
61f9425
fix logging config
aaronc Sep 3, 2025
9acd0e7
fix logging config
aaronc Sep 3, 2025
9e26e4e
fix zero value issue
aaronc Sep 3, 2025
a033460
add run results
aaronc Sep 3, 2025
d029e88
add osmo-like logs
aaronc Sep 3, 2025
7d520eb
handle incomplete runs
aaronc Sep 3, 2025
e5e173d
improve logging and exception handling
aaronc Sep 3, 2025
e376ee9
update gen-changesets with scale factor to test different sizes
aaronc Sep 4, 2025
b914ff4
capture more info about changesets
aaronc Sep 4, 2025
60c179c
add latest benchmarks
aaronc Sep 4, 2025
fe30aef
scaling of mixed changeset gen
aaronc Sep 5, 2025
d3cb21e
add thread-safe fork variants of memiavl
aaronc Sep 5, 2025
0eb1501
adjust change per version params
aaronc Sep 5, 2025
d161aa9
write changeset info file each time
aaronc Sep 5, 2025
75b8b3e
updating analysis code
aaronc Sep 8, 2025
897af8b
updating analysis code
aaronc Sep 8, 2025
8e4aa25
updating analysis code
aaronc Sep 8, 2025
c7b460c
update temp dir creation
aaronc Sep 8, 2025
6c137c7
analyze snapshot timing
aaronc Sep 9, 2025
9d1f905
update analysis
aaronc Sep 10, 2025
81a268d
improve options checking and profiling
aaronc Sep 10, 2025
4907ed5
switch to no-op loggers for v1
aaronc Sep 10, 2025
a3f3def
add heightfilter opt
aaronc Sep 11, 2025
cadd86e
WIP on analysis
aaronc Sep 11, 2025
413d0d5
Add init log message
aaronc Sep 12, 2025
ff31ca5
WIP on analysis, iavl/v1 opts
aaronc Sep 15, 2025
9d73340
Merge remote-tracking branch 'origin/aaronc/updates' into aaronc/updates
aaronc Sep 15, 2025
aacf83d
WIP on analysis
aaronc Sep 15, 2025
f61ec9c
WIP on analysis
aaronc Sep 15, 2025
e38425a
WIP on analysis
aaronc Sep 15, 2025
67dfa0b
WIP on rendering
aaronc Sep 15, 2025
9c9ec49
WIP on rendering
aaronc Sep 16, 2025
5a81f00
add run files to git lfs
aaronc Sep 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.idea/
go.work
go.work.sum
bench/run-*/
/analysis/.ipynb_checkpoints
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.13
15 changes: 15 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
install:
cd bench && go install ./cmd/gen-changesets
cd bench && go install ./cmd/iavl-bench-all
cd iavl-v0 && go install .
cd iavl-v1 && go install .
cd iavl-v2/alpha5 && go install .
cd iavl-v2/alpha6 && go install .
cd memiavl && go install .
cd memiavl/thread-safe && go install .
cd memiavl/query-thread-safe && go install .
cd store-v1/latest && go install .
cd store-v1/iavl-v2 && go install .
cd store-v1/memiavl && go install .

PHONY: install
Empty file added analysis/__init__.py
Empty file.
4,782 changes: 4,782 additions & 0 deletions analysis/analysis.ipynb

Large diffs are not rendered by default.

105 changes: 105 additions & 0 deletions analysis/analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
from read_logs import BenchmarkData
import polars as pl
import plotly.graph_objects as go


def total_ops_per_sec(run: BenchmarkData) -> float:
count = run.versions_df['count'].sum()
total_duration = run.versions_df['duration'].sum() / 1_000_000_000 # convert from nanoseconds
return count / total_duration


def max_mem_gb(run: BenchmarkData) -> float:
return run.versions_df['mem_gb'].max()


def max_disk_gb(run: BenchmarkData) -> float:
return run.versions_df['disk_usage_gb'].max()


def summary(dataset: dict[str, BenchmarkData], run_names=None) -> pl.DataFrame:
if run_names is None:
run_names = list(dataset.keys())
summary_data = []
for name in run_names:
run = dataset[name]
summary_data.append({
'name': name,
'ops_per_sec': total_ops_per_sec(run),
'max_mem_gb': max_mem_gb(run),
'max_disk_gb': max_disk_gb(run),
})
return pl.DataFrame(summary_data)


def calculate_batch_ops_per_sec(versions_df, batch_size=100):
"""Calculate ops_per_sec for every batch_size versions by summing counts and durations."""
return (
versions_df
.with_columns(
((pl.col("version") / batch_size).ceil() * batch_size).alias("version_batch")
)
.group_by("version_batch")
.agg([
pl.col("count").sum().alias("total_count"),
pl.col("duration").sum().alias("total_duration")
])
.with_columns(
(pl.col("total_count") / (pl.col("total_duration") / 1_000_000_000)).alias("ops_per_sec")
)
.select(["version_batch", "ops_per_sec"])
.rename({"version_batch": "version"})
.sort("version")
)


def _create_line_plot(dataset, run_names: list[str], y_axis_title: str,
data_getter=None, column_name=None):
"""Generic utility function to create line plots from dataset.

Either provide data_getter (a function that takes a run and returns a dataframe with 'version' and a y column)
or column_name (to directly access run.versions_df[column_name]).
"""
if run_names is None:
run_names = list(dataset.keys())

fig = go.Figure()
for name in run_names:
run = dataset[name]

if data_getter:
df = data_getter(run)
x_data = df['version']
y_data = df.select(pl.exclude('version')).to_series()
else:
x_data = run.versions_df['version']
y_data = run.versions_df[column_name]

fig.add_trace(go.Scatter(
x=x_data,
y=y_data,
mode='lines',
name=name
))

fig.update_layout(
xaxis_title="Version",
yaxis_title=y_axis_title,
hovermode='x unified'
)
return fig


def plot_ops_per_sec(dataset, run_names: list[str] = None, batch_size=100):
def get_batched_ops(run):
return calculate_batch_ops_per_sec(run.versions_df, batch_size)

return _create_line_plot(dataset, run_names, 'Ops/Sec', data_getter=get_batched_ops)


def plot_mem(dataset, run_names: list[str] = None):
return _create_line_plot(dataset, run_names, 'Memory (GB)', column_name='mem_gb')


def plot_disk_usage(dataset, run_names: list[str] = None):
return _create_line_plot(dataset, run_names, 'Disk Usage (GB)', column_name='disk_usage_gb')
115 changes: 115 additions & 0 deletions analysis/dashboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import os
from pathlib import Path

import humanfriendly
import polars as pl
import streamlit as st
import pandas
from read_logs import load_benchmark_dir

# get benchmark dir from env var BENCHMARK_RESULTS or panic
benchmark_dir = os.getenv('BENCHMARK_RESULTS')
if not benchmark_dir:
raise ValueError('BENCHMARK_RESULTS environment variable not set')

all_data = load_benchmark_dir(benchmark_dir)
all_names = [d.name for d in all_data]

st.title('Benchmark Results Visualization')

summaries = [d.summary for d in all_data if d.summary is not None]
if len(summaries) != 0:
# Show table and bar charts of all summary data
st.header('Summary Data')

summary_df = pandas.DataFrame(summaries)
summary_df.index = [d.name for d in all_data if d.summary is not None]
tab1, tab2, tab3, tab4 = st.tabs(['Summary', 'Ops/sec', 'Max Mem (GB)', 'Max Disk (GB)'])

with tab1:
st.dataframe(summary_df)

with tab2:
st.bar_chart(summary_df, y='ops_per_sec', stack=False)

with tab3:
st.bar_chart(summary_df, y='max_mem_gb', stack=False)

with tab4:
st.bar_chart(summary_df, y='max_disk_gb', stack=False)

# Show line charts for ops_per_sec, mem_sys, disk_usage over versions for each benchmark
st.header('Performance Over Time')

names = st.segmented_control("Benchmark Runs", all_names, selection_mode="multi", default=all_names)

if len(names) == 0:
st.warning('Please select at least one benchmark run to display')
st.stop()

data = [d for d in all_data if d.name in names]

# For now truncate all data to the shortest length
min_versions = min(len(d.versions) for d in data)
for d in data:
d.versions_df = d.versions_df.head(min_versions)

tab1, tab2, tab3 = st.tabs(['Ops/sec', 'Memory', 'Disk Usage'])

with tab1:
ops_per_sec_df = pl.DataFrame({d.name: d.versions_df.select('ops_per_sec').to_series() for d in data})
st.line_chart(ops_per_sec_df, x_label='version', y_label='ops/sec')

with tab2:
mem_df = pl.DataFrame({d.name: d.versions_df.select('mem_gb').to_series() for d in data})
st.line_chart(mem_df, x_label='version', y_label='mem (GB)')

with tab3:
disk_df = pl.DataFrame({d.name: d.versions_df.select('disk_usage_gb').to_series() for d in data})
st.line_chart(disk_df, x_label='version', y_label='disk (GB)')

# with tab4:
# disk_io_df = pl.DataFrame({d.name: d.versions_df.select('disk_io').to_series() for d in data})
# st.line_chart(disk_io_df, x_label='version')

st.text(f'Showing data from {len(all_data)} benchmark logs in {Path(benchmark_dir).absolute()}')

init_data0 = all_data[0].init_data
changeset_dir0 = init_data0.get('changeset_dir') if init_data0 else None
changeset_info0 = init_data0.get('changeset_info') if init_data0 else None
st.markdown(f'Changeset Dir: `{changeset_dir0}`')
st.markdown(f'Changeset Versions: `{changeset_info0.get("versions")}`')
for store in changeset_info0.get('store_params'):
st.markdown(f'Store: `{store["store_key"]}`')
st.markdown(
f'* Initial Size=`{humanfriendly.format_number(store["initial_size"])}` -> Final Size=`{humanfriendly.format_number(store["final_size"])}` (over `{store["versions"]}` versions)')
st.markdown(
f'* K mean=`{store["key_mean"]}`, stddev=`{store["key_std_dev"]}`, V mean=`{store["value_mean"]}`, stddev=`{store["value_std_dev"]}`')
st.markdown(f'* Change per version=`{store["change_per_version"]}`, delete fraction=`{store["delete_fraction"]}`')

for d in all_data:
st.markdown(f'## {d.name}')
st.markdown(f'`{len(d.versions)}` Versions Successfully Committed')
if d.init_data:
if 'changeset_dir' in d.init_data:
changeset_dir = d.init_data['changeset_dir']
if changeset_dir != changeset_dir0:
raise ValueError('Benchmark runs have different changeset dirs')
if 'start_version' in d.init_data:
start_version = d.init_data['start_version']
if start_version != 0:
st.markdown(f'Start Version: `{start_version}`')
if 'target_version' in d.init_data:
target_version = d.init_data['target_version']
if target_version != 0:
st.markdown(f'Target Version: `{target_version}`')
if 'db_options' in d.init_data:
db_options = d.init_data['db_options']
st.markdown(f'DB Options:')
st.json(db_options, expanded=False)
if d.memiavl_snapshots is not None:
with st.expander('Memiavl Snapshot Details', expanded=False):
st.dataframe(d.memiavl_snapshots)
st.line_chart(d.memiavl_snapshots.select("version", pl.col("snapshot_duration").dt.total_minutes().alias("snapshot_minutes")),
x="version",
y="snapshot_minutes")
20 changes: 20 additions & 0 deletions analysis/memiavl_snapshots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from datetime import datetime


def capture_memiavl_snapshot_log(row, snapshots):
msg = row.get("msg")
time = datetime.fromisoformat(row.get("time"))
version = row.get("version")
match msg:
case "start rewriting snapshot":
snapshots += [{"version": version, "start_time": time}]
case "finished rewriting snapshot":
snapshots[-1]["end_time"] = time
snapshots[-1]["snapshot_duration"] = time - snapshots[-1]["start_time"]
case "finished best-effort WAL catchup":
snapshots[-1]["best_effort_wal_time"] = time
snapshots[-1]["best_effort_wal_duration"] = time - snapshots[-1]["end_time"]
case "switched to new snapshot":
snapshots[-1]["switch_time"] = time
snapshots[-1]["wal_sync_duration"] = time - snapshots[-1]["best_effort_wal_time"]
snapshots[-1]["switch_version"] = version
Loading