bug-ops · bug-ops · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - TUI test automation infrastructure: EventSource trait abstraction, insta widget snapshot tests, TestBackend integration tests, proptest layout verification, expectrl E2E terminal tests (#542)
 - CI snapshot regression pipeline with `cargo insta test --check` (#547)
 - Pipeline API with composable, type-safe `Step` trait, `Pipeline` builder, `ParallelStep` combinator, and built-in steps (`LlmStep`, `RetrievalStep`, `ExtractStep`, `MapStep`) (#466, #467, #468)
+- Structured intent classification for skill disambiguation: when top-2 skill scores are within `disambiguation_threshold` (default 0.05), agent calls LLM via `chat_typed::<IntentClassification>()` to select the best-matching skill (#550)
+- `ScoredMatch` struct exposing both skill index and cosine similarity score from matcher backends
+- `IntentClassification` type (`skill_name`, `confidence`, `params`) with `JsonSchema` derive for schema-enforced LLM responses
+- `disambiguation_threshold` in `[skills]` config section (default: 0.05) with `with_disambiguation_threshold()` builder on `Agent`
 
 ## [0.10.0] - 2026-02-18
 

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -55,7 +55,9 @@ This is the core idea behind Zeph. Every byte that enters the LLM context window
 
 Most frameworks inject all tool descriptions into every prompt. 50 tools installed? 50 descriptions in every request.
 
-Zeph embeds skills and MCP tools as vectors at startup (concurrent embedding via `buffer_unordered`), then retrieves only the **top-K relevant** per query via cosine similarity. Install 500 skills — the prompt sees only the 5 that matter. [How skills work →](https://bug-ops.github.io/zeph/guide/skills.html)
+Zeph embeds skills and MCP tools as vectors at startup (concurrent embedding via `buffer_unordered`), then retrieves only the **top-K relevant** per query via cosine similarity. Install 500 skills — the prompt sees only the 5 that matter.
+
+When two candidates score within a configurable threshold of each other, structured intent classification resolves the ambiguity: the agent calls the LLM with a typed `IntentClassification` schema and reorders candidates accordingly — no hallucination, no guessing. [How skills work →](https://bug-ops.github.io/zeph/guide/skills.html)
 
 ### Smart Output Filtering — 70-99% Token Savings
 

diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs
@@ -2,6 +2,8 @@ use std::fmt::Write;
 
 use zeph_llm::provider::MessagePart;
 use zeph_memory::semantic::estimate_tokens;
+use zeph_skills::ScoredMatch;
+use zeph_skills::loader::SkillMeta;
 use zeph_skills::prompt::format_skills_catalog;
 
 use super::{
@@ -628,12 +630,65 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
         Ok(())
     }
 
+    async fn disambiguate_skills(
+        &self,
+        query: &str,
+        all_meta: &[&SkillMeta],
+        scored: &[ScoredMatch],
+    ) -> Option<Vec<usize>> {
+        let mut candidates = String::new();
+        for sm in scored {
+            if let Some(meta) = all_meta.get(sm.index) {
+                let _ = writeln!(
+                    candidates,
+                    "- {} (score: {:.3}): {}",
+                    meta.name, sm.score, meta.description
+                );
+            }
+        }
+
+        let prompt = format!(
+            "The user said: \"{query}\"\n\n\
+             These skills matched with similar scores:\n{candidates}\n\
+             Which skill best matches the user's intent? \
+             Return the skill_name, your confidence (0-1), and any extracted parameters."
+        );
+
+        let messages = vec![Message::from_legacy(Role::User, prompt)];
+        match self
+            .provider
+            .chat_typed::<zeph_skills::IntentClassification>(&messages)
+            .await
+        {
+            Ok(classification) => {
+                tracing::info!(
+                    skill = %classification.skill_name,
+                    confidence = classification.confidence,
+                    "disambiguation selected skill"
+                );
+                let mut indices: Vec<usize> = scored.iter().map(|s| s.index).collect();
+                if let Some(pos) = indices.iter().position(|&i| {
+                    all_meta
+                        .get(i)
+                        .is_some_and(|m| m.name == classification.skill_name)
+                }) {
+                    indices.swap(0, pos);
+                }
+                Some(indices)
+            }
+            Err(e) => {
+                tracing::warn!("disambiguation failed, using original order: {e:#}");
+                None
+            }
+        }
+    }
+
     #[allow(clippy::too_many_lines)]
     pub(super) async fn rebuild_system_prompt(&mut self, query: &str) {
         let all_meta = self.skill_state.registry.all_meta();
         let matched_indices: Vec<usize> = if let Some(matcher) = &self.skill_state.matcher {
             let provider = self.provider.clone();
-            matcher
+            let scored = matcher
                 .match_skills(
                     &all_meta,
                     query,
@@ -644,7 +699,18 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
                         Box::pin(async move { p.embed(&owned).await })
                     },
                 )
-                .await
+                .await;
+
+            if scored.len() >= 2
+                && (scored[0].score - scored[1].score) < self.skill_state.disambiguation_threshold
+            {
+                match self.disambiguate_skills(query, &all_meta, &scored).await {
+                    Some(reordered) => reordered,
+                    None => scored.iter().map(|s| s.index).collect(),
+                }
+            } else {
+                scored.iter().map(|s| s.index).collect()
+            }
         } else {
             (0..all_meta.len()).collect()
         };
@@ -1670,4 +1736,185 @@ mod tests {
             panic!("expected ToolResult");
         }
     }
+
+    #[tokio::test]
+    async fn disambiguate_skills_reorders_on_match() {
+        let json = r#"{"skill_name":"beta_skill","confidence":0.9,"params":{}}"#;
+        let provider = mock_provider(vec![json.to_string()]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let agent = Agent::new(provider, channel, registry, None, 5, executor);
+
+        let metas = vec![
+            SkillMeta {
+                name: "alpha_skill".into(),
+                description: "does alpha".into(),
+                compatibility: None,
+                license: None,
+                metadata: Vec::new(),
+                allowed_tools: Vec::new(),
+                skill_dir: std::path::PathBuf::new(),
+            },
+            SkillMeta {
+                name: "beta_skill".into(),
+                description: "does beta".into(),
+                compatibility: None,
+                license: None,
+                metadata: Vec::new(),
+                allowed_tools: Vec::new(),
+                skill_dir: std::path::PathBuf::new(),
+            },
+        ];
+        let refs: Vec<&SkillMeta> = metas.iter().collect();
+        let scored = vec![
+            ScoredMatch {
+                index: 0,
+                score: 0.90,
+            },
+            ScoredMatch {
+                index: 1,
+                score: 0.88,
+            },
+        ];
+
+        let result = agent
+            .disambiguate_skills("do beta stuff", &refs, &scored)
+            .await;
+        assert!(result.is_some());
+        let indices = result.unwrap();
+        assert_eq!(indices[0], 1); // beta_skill moved to front
+    }
+
+    #[tokio::test]
+    async fn disambiguate_skills_returns_none_on_error() {
+        let provider = mock_provider_failing();
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let agent = Agent::new(provider, channel, registry, None, 5, executor);
+
+        let metas = vec![SkillMeta {
+            name: "test".into(),
+            description: "test".into(),
+            compatibility: None,
+            license: None,
+            metadata: Vec::new(),
+            allowed_tools: Vec::new(),
+            skill_dir: std::path::PathBuf::new(),
+        }];
+        let refs: Vec<&SkillMeta> = metas.iter().collect();
+        let scored = vec![ScoredMatch {
+            index: 0,
+            score: 0.5,
+        }];
+
+        let result = agent.disambiguate_skills("query", &refs, &scored).await;
+        assert!(result.is_none());
+    }
+
+    #[tokio::test]
+    async fn disambiguate_skills_empty_candidates() {
+        let json = r#"{"skill_name":"none","confidence":0.1,"params":{}}"#;
+        let provider = mock_provider(vec![json.to_string()]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let agent = Agent::new(provider, channel, registry, None, 5, executor);
+
+        let metas: Vec<SkillMeta> = vec![];
+        let refs: Vec<&SkillMeta> = metas.iter().collect();
+        let scored: Vec<ScoredMatch> = vec![];
+
+        let result = agent.disambiguate_skills("query", &refs, &scored).await;
+        assert!(result.is_some());
+        assert!(result.unwrap().is_empty());
+    }
+
+    #[tokio::test]
+    async fn disambiguate_skills_unknown_skill_preserves_order() {
+        let json = r#"{"skill_name":"nonexistent","confidence":0.5,"params":{}}"#;
+        let provider = mock_provider(vec![json.to_string()]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let agent = Agent::new(provider, channel, registry, None, 5, executor);
+
+        let metas = vec![
+            SkillMeta {
+                name: "first".into(),
+                description: "first skill".into(),
+                compatibility: None,
+                license: None,
+                metadata: Vec::new(),
+                allowed_tools: Vec::new(),
+                skill_dir: std::path::PathBuf::new(),
+            },
+            SkillMeta {
+                name: "second".into(),
+                description: "second skill".into(),
+                compatibility: None,
+                license: None,
+                metadata: Vec::new(),
+                allowed_tools: Vec::new(),
+                skill_dir: std::path::PathBuf::new(),
+            },
+        ];
+        let refs: Vec<&SkillMeta> = metas.iter().collect();
+        let scored = vec![
+            ScoredMatch {
+                index: 0,
+                score: 0.9,
+            },
+            ScoredMatch {
+                index: 1,
+                score: 0.88,
+            },
+        ];
+
+        let result = agent
+            .disambiguate_skills("query", &refs, &scored)
+            .await
+            .unwrap();
+        // No swap since LLM returned unknown name
+        assert_eq!(result[0], 0);
+        assert_eq!(result[1], 1);
+    }
+
+    #[tokio::test]
+    async fn disambiguate_single_candidate_no_swap() {
+        let json = r#"{"skill_name":"only_skill","confidence":0.95,"params":{}}"#;
+        let provider = mock_provider(vec![json.to_string()]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let agent = Agent::new(provider, channel, registry, None, 5, executor);
+
+        let metas = vec![SkillMeta {
+            name: "only_skill".into(),
+            description: "the only one".into(),
+            compatibility: None,
+            license: None,
+            metadata: Vec::new(),
+            allowed_tools: Vec::new(),
+            skill_dir: std::path::PathBuf::new(),
+        }];
+        let refs: Vec<&SkillMeta> = metas.iter().collect();
+        let scored = vec![ScoredMatch {
+            index: 0,
+            score: 0.95,
+        }];
+
+        let result = agent
+            .disambiguate_skills("query", &refs, &scored)
+            .await
+            .unwrap();
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0], 0);
+    }
 }
diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs
@@ -70,6 +70,7 @@ pub(super) struct SkillState {
     pub(super) skill_paths: Vec<PathBuf>,
     pub(super) matcher: Option<SkillMatcherBackend>,
     pub(super) max_active_skills: usize,
+    pub(super) disambiguation_threshold: f32,
     pub(super) embedding_model: String,
     pub(super) skill_reload_rx: Option<mpsc::Receiver<SkillEvent>>,
     pub(super) active_skill_names: Vec<String>,
@@ -182,6 +183,7 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
                 skill_paths: Vec::new(),
                 matcher,
                 max_active_skills,
+                disambiguation_threshold: 0.05,
                 embedding_model: String::new(),
                 skill_reload_rx: None,
                 active_skill_names: Vec::new(),
@@ -267,6 +269,12 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
         self
     }
 
+    #[must_use]
+    pub fn with_disambiguation_threshold(mut self, threshold: f32) -> Self {
+        self.skill_state.disambiguation_threshold = threshold;
+        self
+    }
+
     #[must_use]
     pub fn with_shutdown(mut self, rx: watch::Receiver<bool>) -> Self {
         self.shutdown = rx;
@@ -858,6 +866,7 @@ impl<C: Channel, T: ToolExecutor> Agent<C, T> {
         self.memory_state.recall_limit = config.memory.semantic.recall_limit;
         self.memory_state.summarization_threshold = config.memory.summarization_threshold;
         self.skill_state.max_active_skills = config.skills.max_active_skills;
+        self.skill_state.disambiguation_threshold = config.skills.disambiguation_threshold;
 
         if config.memory.context_budget_tokens > 0 {
             self.context_state.budget = Some(ContextBudget::new(

diff --git a/crates/zeph-core/src/config/types.rs b/crates/zeph-core/src/config/types.rs
@@ -262,12 +262,18 @@ pub struct SkillsConfig {
     pub paths: Vec<String>,
     #[serde(default = "default_max_active_skills")]
     pub max_active_skills: usize,
+    #[serde(default = "default_disambiguation_threshold")]
+    pub disambiguation_threshold: f32,
     #[serde(default)]
     pub learning: LearningConfig,
     #[serde(default)]
     pub trust: TrustConfig,
 }
 
+fn default_disambiguation_threshold() -> f32 {
+    0.05
+}
+
 #[derive(Debug, Clone, Deserialize)]
 pub struct TrustConfig {
     #[serde(default = "default_trust_default_level")]
@@ -967,6 +973,7 @@ impl Config {
             skills: SkillsConfig {
                 paths: vec!["./skills".into()],
                 max_active_skills: default_max_active_skills(),
+                disambiguation_threshold: default_disambiguation_threshold(),
                 learning: LearningConfig::default(),
                 trust: TrustConfig::default(),
             },

diff --git a/crates/zeph-llm/Cargo.toml b/crates/zeph-llm/Cargo.toml
@@ -23,8 +23,8 @@ futures-core.workspace = true
 hf-hub = { workspace = true, optional = true }
 ollama-rs.workspace = true
 reqwest = { workspace = true, features = ["json", "rustls", "stream"] }
-serde = { workspace = true, features = ["derive"] }
 schemars.workspace = true
+serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 tokenizers = { workspace = true, optional = true }
 tokio = { workspace = true, features = ["rt", "sync", "time"] }

diff --git a/crates/zeph-skills/Cargo.toml b/crates/zeph-skills/Cargo.toml
@@ -17,6 +17,7 @@ qdrant-client = { workspace = true, features = ["serde"] }
 serde = { workspace = true, features = ["derive"] }
 serde_json.workspace = true
 futures.workspace = true
+schemars.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["sync", "rt", "time"] }
 tracing.workspace = true

diff --git a/crates/zeph-skills/src/lib.rs b/crates/zeph-skills/src/lib.rs
@@ -12,4 +12,5 @@ pub mod trust;
 pub mod watcher;
 
 pub use error::SkillError;
+pub use matcher::{IntentClassification, ScoredMatch};
 pub use trust::{SkillSource, SkillTrust, TrustLevel, compute_skill_hash};