Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
- TUI test automation infrastructure: EventSource trait abstraction, insta widget snapshot tests, TestBackend integration tests, proptest layout verification, expectrl E2E terminal tests (#542)
- CI snapshot regression pipeline with `cargo insta test --check` (#547)
- Pipeline API with composable, type-safe `Step` trait, `Pipeline` builder, `ParallelStep` combinator, and built-in steps (`LlmStep`, `RetrievalStep`, `ExtractStep`, `MapStep`) (#466, #467, #468)
- Structured intent classification for skill disambiguation: when top-2 skill scores are within `disambiguation_threshold` (default 0.05), agent calls LLM via `chat_typed::<IntentClassification>()` to select the best-matching skill (#550)
- `ScoredMatch` struct exposing both skill index and cosine similarity score from matcher backends
- `IntentClassification` type (`skill_name`, `confidence`, `params`) with `JsonSchema` derive for schema-enforced LLM responses
- `disambiguation_threshold` in `[skills]` config section (default: 0.05) with `with_disambiguation_threshold()` builder on `Agent`

## [0.10.0] - 2026-02-18

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ This is the core idea behind Zeph. Every byte that enters the LLM context window

Most frameworks inject all tool descriptions into every prompt. 50 tools installed? 50 descriptions in every request.

Zeph embeds skills and MCP tools as vectors at startup (concurrent embedding via `buffer_unordered`), then retrieves only the **top-K relevant** per query via cosine similarity. Install 500 skills — the prompt sees only the 5 that matter. [How skills work →](https://bug-ops.github.io/zeph/guide/skills.html)
Zeph embeds skills and MCP tools as vectors at startup (concurrent embedding via `buffer_unordered`), then retrieves only the **top-K relevant** per query via cosine similarity. Install 500 skills — the prompt sees only the 5 that matter.

When two candidates score within a configurable threshold of each other, structured intent classification resolves the ambiguity: the agent calls the LLM with a typed `IntentClassification` schema and reorders candidates accordingly — no hallucination, no guessing. [How skills work →](https://bug-ops.github.io/zeph/guide/skills.html)

### Smart Output Filtering — 70-99% Token Savings

Expand Down
251 changes: 249 additions & 2 deletions crates/zeph-core/src/agent/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ use std::fmt::Write;

use zeph_llm::provider::MessagePart;
use zeph_memory::semantic::estimate_tokens;
use zeph_skills::ScoredMatch;
use zeph_skills::loader::SkillMeta;
use zeph_skills::prompt::format_skills_catalog;

use super::{
Expand Down Expand Up @@ -628,12 +630,65 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
Ok(())
}

async fn disambiguate_skills(
&self,
query: &str,
all_meta: &[&SkillMeta],
scored: &[ScoredMatch],
) -> Option<Vec<usize>> {
let mut candidates = String::new();
for sm in scored {
if let Some(meta) = all_meta.get(sm.index) {
let _ = writeln!(
candidates,
"- {} (score: {:.3}): {}",
meta.name, sm.score, meta.description
);
}
}

let prompt = format!(
"The user said: \"{query}\"\n\n\
These skills matched with similar scores:\n{candidates}\n\
Which skill best matches the user's intent? \
Return the skill_name, your confidence (0-1), and any extracted parameters."
);

let messages = vec![Message::from_legacy(Role::User, prompt)];
match self
.provider
.chat_typed::<zeph_skills::IntentClassification>(&messages)
.await
{
Ok(classification) => {
tracing::info!(
skill = %classification.skill_name,
confidence = classification.confidence,
"disambiguation selected skill"
);
let mut indices: Vec<usize> = scored.iter().map(|s| s.index).collect();
if let Some(pos) = indices.iter().position(|&i| {
all_meta
.get(i)
.is_some_and(|m| m.name == classification.skill_name)
}) {
indices.swap(0, pos);
}
Some(indices)
}
Err(e) => {
tracing::warn!("disambiguation failed, using original order: {e:#}");
None
}
}
}

#[allow(clippy::too_many_lines)]
pub(super) async fn rebuild_system_prompt(&mut self, query: &str) {
let all_meta = self.skill_state.registry.all_meta();
let matched_indices: Vec<usize> = if let Some(matcher) = &self.skill_state.matcher {
let provider = self.provider.clone();
matcher
let scored = matcher
.match_skills(
&all_meta,
query,
Expand All @@ -644,7 +699,18 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
Box::pin(async move { p.embed(&owned).await })
},
)
.await
.await;

if scored.len() >= 2
&& (scored[0].score - scored[1].score) < self.skill_state.disambiguation_threshold
{
match self.disambiguate_skills(query, &all_meta, &scored).await {
Some(reordered) => reordered,
None => scored.iter().map(|s| s.index).collect(),
}
} else {
scored.iter().map(|s| s.index).collect()
}
} else {
(0..all_meta.len()).collect()
};
Expand Down Expand Up @@ -1670,4 +1736,185 @@ mod tests {
panic!("expected ToolResult");
}
}

#[tokio::test]
async fn disambiguate_skills_reorders_on_match() {
let json = r#"{"skill_name":"beta_skill","confidence":0.9,"params":{}}"#;
let provider = mock_provider(vec![json.to_string()]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let agent = Agent::new(provider, channel, registry, None, 5, executor);

let metas = vec![
SkillMeta {
name: "alpha_skill".into(),
description: "does alpha".into(),
compatibility: None,
license: None,
metadata: Vec::new(),
allowed_tools: Vec::new(),
skill_dir: std::path::PathBuf::new(),
},
SkillMeta {
name: "beta_skill".into(),
description: "does beta".into(),
compatibility: None,
license: None,
metadata: Vec::new(),
allowed_tools: Vec::new(),
skill_dir: std::path::PathBuf::new(),
},
];
let refs: Vec<&SkillMeta> = metas.iter().collect();
let scored = vec![
ScoredMatch {
index: 0,
score: 0.90,
},
ScoredMatch {
index: 1,
score: 0.88,
},
];

let result = agent
.disambiguate_skills("do beta stuff", &refs, &scored)
.await;
assert!(result.is_some());
let indices = result.unwrap();
assert_eq!(indices[0], 1); // beta_skill moved to front
}

#[tokio::test]
async fn disambiguate_skills_returns_none_on_error() {
let provider = mock_provider_failing();
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let agent = Agent::new(provider, channel, registry, None, 5, executor);

let metas = vec![SkillMeta {
name: "test".into(),
description: "test".into(),
compatibility: None,
license: None,
metadata: Vec::new(),
allowed_tools: Vec::new(),
skill_dir: std::path::PathBuf::new(),
}];
let refs: Vec<&SkillMeta> = metas.iter().collect();
let scored = vec![ScoredMatch {
index: 0,
score: 0.5,
}];

let result = agent.disambiguate_skills("query", &refs, &scored).await;
assert!(result.is_none());
}

#[tokio::test]
async fn disambiguate_skills_empty_candidates() {
let json = r#"{"skill_name":"none","confidence":0.1,"params":{}}"#;
let provider = mock_provider(vec![json.to_string()]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let agent = Agent::new(provider, channel, registry, None, 5, executor);

let metas: Vec<SkillMeta> = vec![];
let refs: Vec<&SkillMeta> = metas.iter().collect();
let scored: Vec<ScoredMatch> = vec![];

let result = agent.disambiguate_skills("query", &refs, &scored).await;
assert!(result.is_some());
assert!(result.unwrap().is_empty());
}

#[tokio::test]
async fn disambiguate_skills_unknown_skill_preserves_order() {
let json = r#"{"skill_name":"nonexistent","confidence":0.5,"params":{}}"#;
let provider = mock_provider(vec![json.to_string()]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let agent = Agent::new(provider, channel, registry, None, 5, executor);

let metas = vec![
SkillMeta {
name: "first".into(),
description: "first skill".into(),
compatibility: None,
license: None,
metadata: Vec::new(),
allowed_tools: Vec::new(),
skill_dir: std::path::PathBuf::new(),
},
SkillMeta {
name: "second".into(),
description: "second skill".into(),
compatibility: None,
license: None,
metadata: Vec::new(),
allowed_tools: Vec::new(),
skill_dir: std::path::PathBuf::new(),
},
];
let refs: Vec<&SkillMeta> = metas.iter().collect();
let scored = vec![
ScoredMatch {
index: 0,
score: 0.9,
},
ScoredMatch {
index: 1,
score: 0.88,
},
];

let result = agent
.disambiguate_skills("query", &refs, &scored)
.await
.unwrap();
// No swap since LLM returned unknown name
assert_eq!(result[0], 0);
assert_eq!(result[1], 1);
}

#[tokio::test]
async fn disambiguate_single_candidate_no_swap() {
let json = r#"{"skill_name":"only_skill","confidence":0.95,"params":{}}"#;
let provider = mock_provider(vec![json.to_string()]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let agent = Agent::new(provider, channel, registry, None, 5, executor);

let metas = vec![SkillMeta {
name: "only_skill".into(),
description: "the only one".into(),
compatibility: None,
license: None,
metadata: Vec::new(),
allowed_tools: Vec::new(),
skill_dir: std::path::PathBuf::new(),
}];
let refs: Vec<&SkillMeta> = metas.iter().collect();
let scored = vec![ScoredMatch {
index: 0,
score: 0.95,
}];

let result = agent
.disambiguate_skills("query", &refs, &scored)
.await
.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0], 0);
}
}
9 changes: 9 additions & 0 deletions crates/zeph-core/src/agent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ pub(super) struct SkillState {
pub(super) skill_paths: Vec<PathBuf>,
pub(super) matcher: Option<SkillMatcherBackend>,
pub(super) max_active_skills: usize,
pub(super) disambiguation_threshold: f32,
pub(super) embedding_model: String,
pub(super) skill_reload_rx: Option<mpsc::Receiver<SkillEvent>>,
pub(super) active_skill_names: Vec<String>,
Expand Down Expand Up @@ -182,6 +183,7 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
skill_paths: Vec::new(),
matcher,
max_active_skills,
disambiguation_threshold: 0.05,
embedding_model: String::new(),
skill_reload_rx: None,
active_skill_names: Vec::new(),
Expand Down Expand Up @@ -267,6 +269,12 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
self
}

#[must_use]
pub fn with_disambiguation_threshold(mut self, threshold: f32) -> Self {
self.skill_state.disambiguation_threshold = threshold;
self
}

#[must_use]
pub fn with_shutdown(mut self, rx: watch::Receiver<bool>) -> Self {
self.shutdown = rx;
Expand Down Expand Up @@ -858,6 +866,7 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
self.memory_state.recall_limit = config.memory.semantic.recall_limit;
self.memory_state.summarization_threshold = config.memory.summarization_threshold;
self.skill_state.max_active_skills = config.skills.max_active_skills;
self.skill_state.disambiguation_threshold = config.skills.disambiguation_threshold;

if config.memory.context_budget_tokens > 0 {
self.context_state.budget = Some(ContextBudget::new(
Expand Down
7 changes: 7 additions & 0 deletions crates/zeph-core/src/config/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,18 @@ pub struct SkillsConfig {
pub paths: Vec<String>,
#[serde(default = "default_max_active_skills")]
pub max_active_skills: usize,
#[serde(default = "default_disambiguation_threshold")]
pub disambiguation_threshold: f32,
#[serde(default)]
pub learning: LearningConfig,
#[serde(default)]
pub trust: TrustConfig,
}

fn default_disambiguation_threshold() -> f32 {
0.05
}

#[derive(Debug, Clone, Deserialize)]
pub struct TrustConfig {
#[serde(default = "default_trust_default_level")]
Expand Down Expand Up @@ -967,6 +973,7 @@ impl Config {
skills: SkillsConfig {
paths: vec!["./skills".into()],
max_active_skills: default_max_active_skills(),
disambiguation_threshold: default_disambiguation_threshold(),
learning: LearningConfig::default(),
trust: TrustConfig::default(),
},
Expand Down
2 changes: 1 addition & 1 deletion crates/zeph-llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ futures-core.workspace = true
hf-hub = { workspace = true, optional = true }
ollama-rs.workspace = true
reqwest = { workspace = true, features = ["json", "rustls", "stream"] }
serde = { workspace = true, features = ["derive"] }
schemars.workspace = true
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
tokenizers = { workspace = true, optional = true }
tokio = { workspace = true, features = ["rt", "sync", "time"] }
Expand Down
1 change: 1 addition & 0 deletions crates/zeph-skills/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ qdrant-client = { workspace = true, features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
serde_json.workspace = true
futures.workspace = true
schemars.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["sync", "rt", "time"] }
tracing.workspace = true
Expand Down
1 change: 1 addition & 0 deletions crates/zeph-skills/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ pub mod trust;
pub mod watcher;

pub use error::SkillError;
pub use matcher::{IntentClassification, ScoredMatch};
pub use trust::{SkillSource, SkillTrust, TrustLevel, compute_skill_hash};
Loading
Loading