diff --git a/Cargo.lock b/Cargo.lock index da7daf4e..05daf3db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8195,6 +8195,7 @@ version = "0.9.6" dependencies = [ "age", "anyhow", + "criterion", "notify", "notify-debouncer-mini", "serde", @@ -8292,6 +8293,7 @@ name = "zeph-memory" version = "0.9.6" dependencies = [ "anyhow", + "criterion", "qdrant-client", "serde_json", "sqlx", @@ -8310,6 +8312,7 @@ version = "0.9.6" dependencies = [ "anyhow", "blake3", + "criterion", "futures", "notify", "notify-debouncer-mini", diff --git a/crates/zeph-core/Cargo.toml b/crates/zeph-core/Cargo.toml index 7bd6455f..9b1b9f1f 100644 --- a/crates/zeph-core/Cargo.toml +++ b/crates/zeph-core/Cargo.toml @@ -32,7 +32,12 @@ zeph-memory.workspace = true zeph-skills = { workspace = true, features = ["qdrant"] } zeph-tools.workspace = true +[[bench]] +name = "context_building" +harness = false + [dev-dependencies] +criterion.workspace = true serial_test.workspace = true tempfile.workspace = true diff --git a/crates/zeph-core/benches/context_building.rs b/crates/zeph-core/benches/context_building.rs new file mode 100644 index 00000000..1d0b460b --- /dev/null +++ b/crates/zeph-core/benches/context_building.rs @@ -0,0 +1,85 @@ +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use std::hint::black_box; +use zeph_memory::estimate_tokens; + +fn generate_messages(count: usize, avg_len: usize) -> Vec { + let base = "This is a simulated message with typical content for an AI conversation. "; + (0..count) + .map(|i| { + let content = base.repeat(avg_len / base.len() + 1); + format!("[user]: message {i} {}", &content[..avg_len]) + }) + .collect() +} + +fn should_compact_check(c: &mut Criterion) { + let mut group = c.benchmark_group("should_compact"); + + for count in [20, 50, 100] { + let messages = generate_messages(count, 200); + group.bench_with_input(BenchmarkId::new("messages", count), &messages, |b, msgs| { + b.iter(|| { + let total: usize = msgs.iter().map(|m| estimate_tokens(m)).sum(); + black_box(total > 4000) + }); + }); + } + + group.finish(); +} + +fn trim_budget_scan(c: &mut Criterion) { + let mut group = c.benchmark_group("trim_budget_scan"); + + for count in [20, 50, 100] { + let messages = generate_messages(count, 200); + let budget = 2000usize; + + group.bench_with_input(BenchmarkId::new("messages", count), &messages, |b, msgs| { + b.iter(|| { + let mut total = 0usize; + let mut keep_from = msgs.len(); + for i in (0..msgs.len()).rev() { + let tokens = estimate_tokens(&msgs[i]); + if total + tokens > budget { + break; + } + total += tokens; + keep_from = i; + } + black_box((keep_from, total)) + }); + }); + } + + group.finish(); +} + +fn history_formatting(c: &mut Criterion) { + let mut group = c.benchmark_group("history_formatting"); + + for count in [10, 30, 50] { + let messages = generate_messages(count, 200); + + group.bench_with_input(BenchmarkId::new("messages", count), &messages, |b, msgs| { + b.iter(|| { + let text: String = msgs + .iter() + .map(|m| m.as_str()) + .collect::>() + .join("\n\n"); + black_box(text) + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + should_compact_check, + trim_budget_scan, + history_formatting +); +criterion_main!(benches); diff --git a/crates/zeph-memory/Cargo.toml b/crates/zeph-memory/Cargo.toml index 28b291f7..e8cb8a4b 100644 --- a/crates/zeph-memory/Cargo.toml +++ b/crates/zeph-memory/Cargo.toml @@ -16,8 +16,13 @@ tracing.workspace = true uuid = { workspace = true, features = ["v4"] } zeph-llm.workspace = true +[[bench]] +name = "token_estimation" +harness = false + [dev-dependencies] anyhow.workspace = true +criterion.workspace = true testcontainers.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } tokio-stream.workspace = true diff --git a/crates/zeph-memory/benches/token_estimation.rs b/crates/zeph-memory/benches/token_estimation.rs new file mode 100644 index 00000000..388a81a1 --- /dev/null +++ b/crates/zeph-memory/benches/token_estimation.rs @@ -0,0 +1,67 @@ +use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; +use std::hint::black_box; +use zeph_memory::estimate_tokens; + +fn generate_text(size: usize) -> String { + let paragraph = "The quick brown fox jumps over the lazy dog. \ + This sentence contains various English words and punctuation marks.\n"; + paragraph.repeat(size / paragraph.len() + 1)[..size].to_string() +} + +fn token_estimation(c: &mut Criterion) { + let mut group = c.benchmark_group("estimate_tokens"); + + for size in [1_000, 10_000, 100_000] { + let input = generate_text(size); + group.throughput(Throughput::Bytes(size as u64)); + group.bench_with_input(BenchmarkId::new("ascii", size), &input, |b, input| { + b.iter(|| estimate_tokens(black_box(input))); + }); + } + + group.finish(); +} + +fn token_estimation_unicode(c: &mut Criterion) { + let mut group = c.benchmark_group("estimate_tokens_unicode"); + + let pattern = "Привет мир! 你好世界! こんにちは世界! 🌍🌎🌏 "; + for size in [1_000, 10_000, 100_000] { + let input = pattern.repeat(size / pattern.len() + 1); + let input = &input[..input.floor_char_boundary(size)]; + let input = input.to_string(); + let actual_len = input.len(); + group.throughput(Throughput::Bytes(actual_len as u64)); + group.bench_with_input( + BenchmarkId::new("unicode", actual_len), + &input, + |b, input| { + b.iter(|| estimate_tokens(black_box(input))); + }, + ); + } + + group.finish(); +} + +fn token_estimation_batch(c: &mut Criterion) { + let mut group = c.benchmark_group("estimate_tokens_batch"); + + let messages: Vec = (0..50) + .map(|i| format!("Message {i}: {}", generate_text(200))) + .collect(); + + group.bench_function("50_messages_sum", |b| { + b.iter(|| black_box(messages.iter().map(|m| estimate_tokens(m)).sum::())); + }); + + group.finish(); +} + +criterion_group!( + benches, + token_estimation, + token_estimation_unicode, + token_estimation_batch +); +criterion_main!(benches); diff --git a/crates/zeph-skills/Cargo.toml b/crates/zeph-skills/Cargo.toml index de419e20..39f4a042 100644 --- a/crates/zeph-skills/Cargo.toml +++ b/crates/zeph-skills/Cargo.toml @@ -25,8 +25,13 @@ uuid = { workspace = true, optional = true, features = ["v5"] } zeph-llm.workspace = true zeph-memory = { workspace = true, optional = true } +[[bench]] +name = "matcher" +harness = false + [dev-dependencies] anyhow.workspace = true +criterion.workspace = true tempfile.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] } diff --git a/crates/zeph-skills/benches/matcher.rs b/crates/zeph-skills/benches/matcher.rs new file mode 100644 index 00000000..c50201a3 --- /dev/null +++ b/crates/zeph-skills/benches/matcher.rs @@ -0,0 +1,50 @@ +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use std::hint::black_box; +use zeph_skills::matcher::cosine_similarity; + +fn generate_vector(dim: usize, seed: f32) -> Vec { + (0..dim).map(|i| ((i as f32 + seed) * 0.1).sin()).collect() +} + +fn cosine_similarity_bench(c: &mut Criterion) { + let mut group = c.benchmark_group("cosine_similarity"); + + for dim in [128, 384, 768, 1536] { + let a = generate_vector(dim, 1.0); + let b = generate_vector(dim, 2.0); + group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |bench, _| { + bench.iter(|| cosine_similarity(black_box(&a), black_box(&b))); + }); + } + + group.finish(); +} + +fn cosine_ranking(c: &mut Criterion) { + let mut group = c.benchmark_group("cosine_ranking"); + + for count in [10, 50, 100] { + let query = generate_vector(384, 0.0); + let candidates: Vec> = + (0..count).map(|i| generate_vector(384, i as f32)).collect(); + + group.bench_with_input(BenchmarkId::new("candidates", count), &count, |b, _| { + b.iter(|| { + let mut scored: Vec<(usize, f32)> = candidates + .iter() + .enumerate() + .map(|(i, emb)| (i, cosine_similarity(&query, emb))) + .collect(); + scored.sort_unstable_by(|a, b| { + b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal) + }); + black_box(scored) + }); + }); + } + + group.finish(); +} + +criterion_group!(benches, cosine_similarity_bench, cosine_ranking); +criterion_main!(benches); diff --git a/crates/zeph-skills/src/matcher.rs b/crates/zeph-skills/src/matcher.rs index 38600c68..e6b80fac 100644 --- a/crates/zeph-skills/src/matcher.rs +++ b/crates/zeph-skills/src/matcher.rs @@ -142,7 +142,8 @@ impl SkillMatcherBackend { } } -fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { +#[must_use] +pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 { if a.len() != b.len() || a.is_empty() { return 0.0; }