diff --git a/Cargo.lock b/Cargo.lock
index da7daf4e..05daf3db 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8195,6 +8195,7 @@ version = "0.9.6"
 dependencies = [
  "age",
  "anyhow",
+ "criterion",
  "notify",
  "notify-debouncer-mini",
  "serde",
@@ -8292,6 +8293,7 @@ name = "zeph-memory"
 version = "0.9.6"
 dependencies = [
  "anyhow",
+ "criterion",
  "qdrant-client",
  "serde_json",
  "sqlx",
@@ -8310,6 +8312,7 @@ version = "0.9.6"
 dependencies = [
  "anyhow",
  "blake3",
+ "criterion",
  "futures",
  "notify",
  "notify-debouncer-mini",
diff --git a/crates/zeph-core/Cargo.toml b/crates/zeph-core/Cargo.toml
index 7bd6455f..9b1b9f1f 100644
--- a/crates/zeph-core/Cargo.toml
+++ b/crates/zeph-core/Cargo.toml
@@ -32,7 +32,12 @@ zeph-memory.workspace = true
 zeph-skills = { workspace = true, features = ["qdrant"] }
 zeph-tools.workspace = true
 
+[[bench]]
+name = "context_building"
+harness = false
+
 [dev-dependencies]
+criterion.workspace = true
 serial_test.workspace = true
 tempfile.workspace = true
 
diff --git a/crates/zeph-core/benches/context_building.rs b/crates/zeph-core/benches/context_building.rs
new file mode 100644
index 00000000..1d0b460b
--- /dev/null
+++ b/crates/zeph-core/benches/context_building.rs
@@ -0,0 +1,85 @@
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use std::hint::black_box;
+use zeph_memory::estimate_tokens;
+
+fn generate_messages(count: usize, avg_len: usize) -> Vec<String> {
+    let base = "This is a simulated message with typical content for an AI conversation. ";
+    (0..count)
+        .map(|i| {
+            let content = base.repeat(avg_len / base.len() + 1);
+            format!("[user]: message {i} {}", &content[..avg_len])
+        })
+        .collect()
+}
+
+fn should_compact_check(c: &mut Criterion) {
+    let mut group = c.benchmark_group("should_compact");
+
+    for count in [20, 50, 100] {
+        let messages = generate_messages(count, 200);
+        group.bench_with_input(BenchmarkId::new("messages", count), &messages, |b, msgs| {
+            b.iter(|| {
+                let total: usize = msgs.iter().map(|m| estimate_tokens(m)).sum();
+                black_box(total > 4000)
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn trim_budget_scan(c: &mut Criterion) {
+    let mut group = c.benchmark_group("trim_budget_scan");
+
+    for count in [20, 50, 100] {
+        let messages = generate_messages(count, 200);
+        let budget = 2000usize;
+
+        group.bench_with_input(BenchmarkId::new("messages", count), &messages, |b, msgs| {
+            b.iter(|| {
+                let mut total = 0usize;
+                let mut keep_from = msgs.len();
+                for i in (0..msgs.len()).rev() {
+                    let tokens = estimate_tokens(&msgs[i]);
+                    if total + tokens > budget {
+                        break;
+                    }
+                    total += tokens;
+                    keep_from = i;
+                }
+                black_box((keep_from, total))
+            });
+        });
+    }
+
+    group.finish();
+}
+
+fn history_formatting(c: &mut Criterion) {
+    let mut group = c.benchmark_group("history_formatting");
+
+    for count in [10, 30, 50] {
+        let messages = generate_messages(count, 200);
+
+        group.bench_with_input(BenchmarkId::new("messages", count), &messages, |b, msgs| {
+            b.iter(|| {
+                let text: String = msgs
+                    .iter()
+                    .map(|m| m.as_str())
+                    .collect::<Vec<_>>()
+                    .join("\n\n");
+                black_box(text)
+            });
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    should_compact_check,
+    trim_budget_scan,
+    history_formatting
+);
+criterion_main!(benches);
diff --git a/crates/zeph-memory/Cargo.toml b/crates/zeph-memory/Cargo.toml
index 28b291f7..e8cb8a4b 100644
--- a/crates/zeph-memory/Cargo.toml
+++ b/crates/zeph-memory/Cargo.toml
@@ -16,8 +16,13 @@ tracing.workspace = true
 uuid = { workspace = true, features = ["v4"] }
 zeph-llm.workspace = true
 
+[[bench]]
+name = "token_estimation"
+harness = false
+
 [dev-dependencies]
 anyhow.workspace = true
+criterion.workspace = true
 testcontainers.workspace = true
 tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
 tokio-stream.workspace = true
diff --git a/crates/zeph-memory/benches/token_estimation.rs b/crates/zeph-memory/benches/token_estimation.rs
new file mode 100644
index 00000000..388a81a1
--- /dev/null
+++ b/crates/zeph-memory/benches/token_estimation.rs
@@ -0,0 +1,67 @@
+use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};
+use std::hint::black_box;
+use zeph_memory::estimate_tokens;
+
+fn generate_text(size: usize) -> String {
+    let paragraph = "The quick brown fox jumps over the lazy dog. \
+                     This sentence contains various English words and punctuation marks.\n";
+    paragraph.repeat(size / paragraph.len() + 1)[..size].to_string()
+}
+
+fn token_estimation(c: &mut Criterion) {
+    let mut group = c.benchmark_group("estimate_tokens");
+
+    for size in [1_000, 10_000, 100_000] {
+        let input = generate_text(size);
+        group.throughput(Throughput::Bytes(size as u64));
+        group.bench_with_input(BenchmarkId::new("ascii", size), &input, |b, input| {
+            b.iter(|| estimate_tokens(black_box(input)));
+        });
+    }
+
+    group.finish();
+}
+
+fn token_estimation_unicode(c: &mut Criterion) {
+    let mut group = c.benchmark_group("estimate_tokens_unicode");
+
+    let pattern = "Привет мир! 你好世界! こんにちは世界! 🌍🌎🌏 ";
+    for size in [1_000, 10_000, 100_000] {
+        let input = pattern.repeat(size / pattern.len() + 1);
+        let input = &input[..input.floor_char_boundary(size)];
+        let input = input.to_string();
+        let actual_len = input.len();
+        group.throughput(Throughput::Bytes(actual_len as u64));
+        group.bench_with_input(
+            BenchmarkId::new("unicode", actual_len),
+            &input,
+            |b, input| {
+                b.iter(|| estimate_tokens(black_box(input)));
+            },
+        );
+    }
+
+    group.finish();
+}
+
+fn token_estimation_batch(c: &mut Criterion) {
+    let mut group = c.benchmark_group("estimate_tokens_batch");
+
+    let messages: Vec<String> = (0..50)
+        .map(|i| format!("Message {i}: {}", generate_text(200)))
+        .collect();
+
+    group.bench_function("50_messages_sum", |b| {
+        b.iter(|| black_box(messages.iter().map(|m| estimate_tokens(m)).sum::<usize>()));
+    });
+
+    group.finish();
+}
+
+criterion_group!(
+    benches,
+    token_estimation,
+    token_estimation_unicode,
+    token_estimation_batch
+);
+criterion_main!(benches);
diff --git a/crates/zeph-skills/Cargo.toml b/crates/zeph-skills/Cargo.toml
index de419e20..39f4a042 100644
--- a/crates/zeph-skills/Cargo.toml
+++ b/crates/zeph-skills/Cargo.toml
@@ -25,8 +25,13 @@ uuid = { workspace = true, optional = true, features = ["v5"] }
 zeph-llm.workspace = true
 zeph-memory = { workspace = true, optional = true }
 
+[[bench]]
+name = "matcher"
+harness = false
+
 [dev-dependencies]
 anyhow.workspace = true
+criterion.workspace = true
 tempfile.workspace = true
 tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] }
 
diff --git a/crates/zeph-skills/benches/matcher.rs b/crates/zeph-skills/benches/matcher.rs
new file mode 100644
index 00000000..c50201a3
--- /dev/null
+++ b/crates/zeph-skills/benches/matcher.rs
@@ -0,0 +1,50 @@
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use std::hint::black_box;
+use zeph_skills::matcher::cosine_similarity;
+
+fn generate_vector(dim: usize, seed: f32) -> Vec<f32> {
+    (0..dim).map(|i| ((i as f32 + seed) * 0.1).sin()).collect()
+}
+
+fn cosine_similarity_bench(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cosine_similarity");
+
+    for dim in [128, 384, 768, 1536] {
+        let a = generate_vector(dim, 1.0);
+        let b = generate_vector(dim, 2.0);
+        group.bench_with_input(BenchmarkId::new("dim", dim), &dim, |bench, _| {
+            bench.iter(|| cosine_similarity(black_box(&a), black_box(&b)));
+        });
+    }
+
+    group.finish();
+}
+
+fn cosine_ranking(c: &mut Criterion) {
+    let mut group = c.benchmark_group("cosine_ranking");
+
+    for count in [10, 50, 100] {
+        let query = generate_vector(384, 0.0);
+        let candidates: Vec<Vec<f32>> =
+            (0..count).map(|i| generate_vector(384, i as f32)).collect();
+
+        group.bench_with_input(BenchmarkId::new("candidates", count), &count, |b, _| {
+            b.iter(|| {
+                let mut scored: Vec<(usize, f32)> = candidates
+                    .iter()
+                    .enumerate()
+                    .map(|(i, emb)| (i, cosine_similarity(&query, emb)))
+                    .collect();
+                scored.sort_unstable_by(|a, b| {
+                    b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
+                });
+                black_box(scored)
+            });
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, cosine_similarity_bench, cosine_ranking);
+criterion_main!(benches);
diff --git a/crates/zeph-skills/src/matcher.rs b/crates/zeph-skills/src/matcher.rs
index 38600c68..e6b80fac 100644
--- a/crates/zeph-skills/src/matcher.rs
+++ b/crates/zeph-skills/src/matcher.rs
@@ -142,7 +142,8 @@ impl SkillMatcherBackend {
     }
 }
 
-fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+#[must_use]
+pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
     if a.len() != b.len() || a.is_empty() {
         return 0.0;
     }