diff --git a/CHANGELOG.md b/CHANGELOG.md index ed542846..a51bde69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] ### Added +- Skill trust levels: 4-tier model (Trusted, Verified, Quarantined, Blocked) with per-turn enforcement +- `TrustGateExecutor` wrapping tool execution with trust-level permission checks +- `AnomalyDetector` with sliding-window threshold counters for quarantined skill monitoring +- blake3 content hashing for skill integrity verification on load and hot-reload +- Quarantine prompt wrapping for structural isolation of untrusted skill bodies +- Self-learning gate: skills with trust < Verified skip auto-improvement +- `skill_trust` SQLite table with migration 009 +- CLI commands: `/skill trust`, `/skill block`, `/skill unblock` +- `[skills.trust]` config section (default_level, local_level, hash_mismatch_level) - `ProviderKind` enum for type-safe provider selection in config - `RuntimeConfig` struct grouping agent runtime fields - `AnyProvider::embed_fn()` shared embedding closure helper diff --git a/Cargo.lock b/Cargo.lock index 832d5039..b41c7308 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8516,6 +8516,7 @@ dependencies = [ "notify", "notify-debouncer-mini", "qdrant-client", + "serde", "serde_json", "tempfile", "thiserror 2.0.18", @@ -8546,6 +8547,7 @@ dependencies = [ "tracing", "url", "uuid", + "zeph-skills", ] [[package]] diff --git a/README.md b/README.md index 59bcc12c..3d668c53 100644 --- a/README.md +++ b/README.md @@ -115,11 +115,12 @@ cargo build --release --features tui | **A2A Protocol** | Agent-to-agent communication via JSON-RPC 2.0 with SSE streaming, delegated task inference through agent pipeline | [A2A](https://bug-ops.github.io/zeph/guide/a2a.html) | | **Model Orchestrator** | Route tasks to different providers with fallback chains | [Orchestrator](https://bug-ops.github.io/zeph/guide/orchestrator.html) | | **Self-Learning** | Skills evolve via failure detection and LLM-generated improvements | [Self-Learning](https://bug-ops.github.io/zeph/guide/self-learning.html) | +| **Skill Trust & Quarantine** | 4-tier trust model (Trusted/Verified/Quarantined/Blocked) with blake3 integrity verification, anomaly detection with automatic blocking, and restricted tool access for untrusted skills | | | **Prompt Caching** | Automatic prompt caching for Anthropic and OpenAI providers, reducing latency and cost on repeated context | | | **Graceful Shutdown** | Ctrl-C triggers ordered teardown with MCP server cleanup and pending task draining | | | **TUI Dashboard** | ratatui terminal UI with tree-sitter syntax highlighting, markdown rendering, deferred model warmup, scrollbar, mouse scroll, thinking blocks, conversation history, splash screen, live metrics, message queueing (max 10, FIFO with Ctrl+K clear) | [TUI](https://bug-ops.github.io/zeph/guide/tui.html) | | **Multi-Channel I/O** | CLI, Discord, Slack, Telegram, and TUI with streaming support | [Channels](https://bug-ops.github.io/zeph/guide/channels.html) | -| **Defense-in-Depth** | Shell sandbox with relative path traversal detection, file sandbox, command filter, secret redaction (Google/GitLab patterns), audit log, SSRF protection (agent + MCP), rate limiter TTL eviction, doom-loop detection | [Security](https://bug-ops.github.io/zeph/security.html) | +| **Defense-in-Depth** | Shell sandbox with relative path traversal detection, file sandbox, command filter, secret redaction (Google/GitLab patterns), audit log, SSRF protection (agent + MCP), rate limiter TTL eviction, doom-loop detection, skill trust quarantine | [Security](https://bug-ops.github.io/zeph/security.html) | ## Architecture diff --git a/config/default.toml b/config/default.toml index d5bf1c17..d2343106 100644 --- a/config/default.toml +++ b/config/default.toml @@ -100,6 +100,14 @@ max_versions = 10 # Cooldown between improvements for same skill (minutes) cooldown_minutes = 60 +[skills.trust] +# Default trust level for newly discovered skills: trusted, verified, quarantined, blocked +default_level = "quarantined" +# Trust level assigned to local (built-in) skills +local_level = "trusted" +# Trust level after blake3 hash mismatch on hot-reload +hash_mismatch_level = "quarantined" + [memory] # SQLite database path for conversation history sqlite_path = "./data/zeph.db" diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs index ad7a4362..0886477e 100644 --- a/crates/zeph-core/src/agent/context.rs +++ b/crates/zeph-core/src/agent/context.rs @@ -699,7 +699,8 @@ impl Agent { .cloned() .collect(); - let skills_prompt = format_skills_prompt(&active_skills, std::env::consts::OS); + let trust_map = self.build_skill_trust_map().await; + let skills_prompt = format_skills_prompt(&active_skills, std::env::consts::OS, &trust_map); let catalog_prompt = format_skills_catalog(&remaining_skills); self.skill_state .last_skills_prompt diff --git a/crates/zeph-core/src/agent/learning.rs b/crates/zeph-core/src/agent/learning.rs index c7292692..88c0a62a 100644 --- a/crates/zeph-core/src/agent/learning.rs +++ b/crates/zeph-core/src/agent/learning.rs @@ -10,6 +10,17 @@ impl Agent { self.learning_config.as_ref().is_some_and(|c| c.enabled) } + #[cfg(feature = "self-learning")] + async fn is_skill_trusted_for_learning(&self, skill_name: &str) -> bool { + let Some(memory) = &self.memory_state.memory else { + return true; + }; + let Ok(Some(row)) = memory.sqlite().load_skill_trust(skill_name).await else { + return true; // no trust record = local skill = trusted + }; + matches!(row.trust_level.as_str(), "trusted" | "verified") + } + #[cfg(not(feature = "self-learning"))] #[allow(dead_code, clippy::unused_self)] pub(super) fn is_learning_enabled(&self) -> bool { @@ -66,6 +77,10 @@ impl Agent { return Ok(false); }; + if !self.is_skill_trusted_for_learning(&name).await { + return Ok(false); + } + let Ok(skill) = self.skill_state.registry.get_skill(&name) else { return Ok(false); }; @@ -117,6 +132,9 @@ impl Agent { if !self.is_learning_enabled() { return Ok(()); } + if !self.is_skill_trusted_for_learning(skill_name).await { + return Ok(()); + } let Some(memory) = &self.memory_state.memory else { return Ok(()); @@ -378,9 +396,12 @@ impl Agent { } Some("approve") => self.handle_skill_approve(parts.get(1).copied()).await, Some("reset") => self.handle_skill_reset(parts.get(1).copied()).await, + Some("trust") => self.handle_skill_trust_command(&parts[1..]).await, + Some("block") => self.handle_skill_block(parts.get(1).copied()).await, + Some("unblock") => self.handle_skill_unblock(parts.get(1).copied()).await, _ => { self.channel - .send("Unknown /skill subcommand. Available: stats, versions, activate, approve, reset") + .send("Unknown /skill subcommand. Available: stats, versions, activate, approve, reset, trust, block, unblock") .await?; Ok(()) } @@ -390,12 +411,20 @@ impl Agent { #[cfg(not(feature = "self-learning"))] pub(super) async fn handle_skill_command( &mut self, - _args: &str, + args: &str, ) -> Result<(), super::error::AgentError> { - self.channel - .send("Self-learning feature is not enabled.") - .await?; - Ok(()) + let parts: Vec<&str> = args.split_whitespace().collect(); + match parts.first().copied() { + Some("trust") => self.handle_skill_trust_command(&parts[1..]).await, + Some("block") => self.handle_skill_block(parts.get(1).copied()).await, + Some("unblock") => self.handle_skill_unblock(parts.get(1).copied()).await, + _ => { + self.channel + .send("Available /skill subcommands: trust, block, unblock") + .await?; + Ok(()) + } + } } #[cfg(feature = "self-learning")] diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs index 1f106b67..a0d19bc9 100644 --- a/crates/zeph-core/src/agent/mod.rs +++ b/crates/zeph-core/src/agent/mod.rs @@ -7,6 +7,7 @@ mod learning; mod mcp; mod persistence; mod streaming; +mod trust_commands; use std::collections::VecDeque; use std::path::PathBuf; @@ -17,6 +18,7 @@ use zeph_llm::any::AnyProvider; use zeph_llm::provider::{LlmProvider, Message, Role}; use crate::metrics::MetricsSnapshot; +use std::collections::HashMap; use zeph_memory::semantic::SemanticMemory; use zeph_skills::loader::Skill; use zeph_skills::matcher::{SkillMatcher, SkillMatcherBackend}; @@ -151,7 +153,8 @@ impl Agent { .iter() .filter_map(|m| registry.get_skill(&m.name).ok()) .collect(); - let skills_prompt = format_skills_prompt(&all_skills, std::env::consts::OS); + let empty_trust = HashMap::new(); + let skills_prompt = format_skills_prompt(&all_skills, std::env::consts::OS, &empty_trust); let system_prompt = build_system_prompt(&skills_prompt, None, None, false); tracing::debug!(len = system_prompt.len(), "initial system prompt built"); tracing::trace!(prompt = %system_prompt, "full system prompt"); @@ -679,7 +682,18 @@ impl Agent { let mut output = String::from("Available skills:\n\n"); for meta in self.skill_state.registry.all_meta() { - let _ = writeln!(output, "- {} — {}", meta.name, meta.description); + let trust_info = if let Some(memory) = &self.memory_state.memory { + memory + .sqlite() + .load_skill_trust(&meta.name) + .await + .ok() + .flatten() + .map_or_else(String::new, |r| format!(" [{}]", r.trust_level)) + } else { + String::new() + }; + let _ = writeln!(output, "- {} — {}{trust_info}", meta.name, meta.description); } if let Some(memory) = &self.memory_state.memory { @@ -799,7 +813,8 @@ impl Agent { .iter() .filter_map(|m| self.skill_state.registry.get_skill(&m.name).ok()) .collect(); - let skills_prompt = format_skills_prompt(&all_skills, std::env::consts::OS); + let trust_map = self.build_skill_trust_map().await; + let skills_prompt = format_skills_prompt(&all_skills, std::env::consts::OS, &trust_map); self.skill_state .last_skills_prompt .clone_from(&skills_prompt); diff --git a/crates/zeph-core/src/agent/trust_commands.rs b/crates/zeph-core/src/agent/trust_commands.rs new file mode 100644 index 00000000..3f0d3ae0 --- /dev/null +++ b/crates/zeph-core/src/agent/trust_commands.rs @@ -0,0 +1,170 @@ +use std::collections::HashMap; +use std::fmt::Write; + +use zeph_skills::TrustLevel; + +use super::{Agent, Channel, ToolExecutor}; + +impl Agent { + /// Handle `/skill trust [name [level]]`. + pub(super) async fn handle_skill_trust_command( + &mut self, + args: &[&str], + ) -> Result<(), super::error::AgentError> { + let Some(memory) = &self.memory_state.memory else { + self.channel.send("Memory not available.").await?; + return Ok(()); + }; + + match args.first().copied() { + None => { + // List all trust levels + let rows = memory.sqlite().load_all_skill_trust().await?; + if rows.is_empty() { + self.channel.send("No skill trust data recorded.").await?; + return Ok(()); + } + let mut output = String::from("Skill trust levels:\n\n"); + for row in &rows { + let _ = writeln!( + output, + "- {} [{}] (source: {}, hash: {}..)", + row.skill_name, + row.trust_level, + row.source_kind, + &row.blake3_hash[..row.blake3_hash.len().min(8)] + ); + } + self.channel.send(&output).await?; + } + Some(name) => { + if let Some(level_str) = args.get(1).copied() { + // Set trust level + let level = match level_str { + "trusted" => TrustLevel::Trusted, + "verified" => TrustLevel::Verified, + "quarantined" => TrustLevel::Quarantined, + "blocked" => TrustLevel::Blocked, + _ => { + self.channel + .send("Invalid trust level. Use: trusted, verified, quarantined, blocked") + .await?; + return Ok(()); + } + }; + let updated = memory + .sqlite() + .set_skill_trust_level(name, &level.to_string()) + .await?; + if updated { + self.channel + .send(&format!("Trust level for \"{name}\" set to {level}.")) + .await?; + } else { + self.channel + .send(&format!("Skill \"{name}\" not found in trust database.")) + .await?; + } + } else { + // Show single skill trust + let row = memory.sqlite().load_skill_trust(name).await?; + match row { + Some(r) => { + self.channel + .send(&format!( + "{}: level={}, source={}, hash={}", + r.skill_name, r.trust_level, r.source_kind, r.blake3_hash + )) + .await?; + } + None => { + self.channel + .send(&format!("No trust data for \"{name}\".")) + .await?; + } + } + } + } + } + Ok(()) + } + + /// Handle `/skill block `. + pub(super) async fn handle_skill_block( + &mut self, + name: Option<&str>, + ) -> Result<(), super::error::AgentError> { + let Some(name) = name else { + self.channel.send("Usage: /skill block ").await?; + return Ok(()); + }; + let Some(memory) = &self.memory_state.memory else { + self.channel.send("Memory not available.").await?; + return Ok(()); + }; + let updated = memory + .sqlite() + .set_skill_trust_level(name, "blocked") + .await?; + if updated { + self.channel + .send(&format!("Skill \"{name}\" blocked.")) + .await?; + } else { + self.channel + .send(&format!("Skill \"{name}\" not found in trust database.")) + .await?; + } + Ok(()) + } + + /// Handle `/skill unblock `. + pub(super) async fn handle_skill_unblock( + &mut self, + name: Option<&str>, + ) -> Result<(), super::error::AgentError> { + let Some(name) = name else { + self.channel.send("Usage: /skill unblock ").await?; + return Ok(()); + }; + let Some(memory) = &self.memory_state.memory else { + self.channel.send("Memory not available.").await?; + return Ok(()); + }; + let updated = memory + .sqlite() + .set_skill_trust_level(name, "quarantined") + .await?; + if updated { + self.channel + .send(&format!("Skill \"{name}\" unblocked (set to quarantined).")) + .await?; + } else { + self.channel + .send(&format!("Skill \"{name}\" not found in trust database.")) + .await?; + } + Ok(()) + } + + pub(super) async fn build_skill_trust_map(&self) -> HashMap { + let Some(memory) = &self.memory_state.memory else { + return HashMap::new(); + }; + let Ok(rows) = memory.sqlite().load_all_skill_trust().await else { + return HashMap::new(); + }; + rows.into_iter() + .filter_map(|r| { + let level = match r.trust_level.as_str() { + "trusted" => TrustLevel::Trusted, + "verified" => TrustLevel::Verified, + "quarantined" => TrustLevel::Quarantined, + "blocked" => TrustLevel::Blocked, + _ => return None, + }; + Some((r.skill_name, level)) + }) + .collect() + } +} diff --git a/crates/zeph-core/src/config/types.rs b/crates/zeph-core/src/config/types.rs index ac2a43e0..8a2ed0da 100644 --- a/crates/zeph-core/src/config/types.rs +++ b/crates/zeph-core/src/config/types.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use serde::Deserialize; +use zeph_skills::TrustLevel; use zeph_tools::{AutonomyLevel, ToolsConfig}; use crate::vault::Secret; @@ -261,6 +262,40 @@ pub struct SkillsConfig { pub max_active_skills: usize, #[serde(default)] pub learning: LearningConfig, + #[serde(default)] + pub trust: TrustConfig, +} + +#[derive(Debug, Clone, Deserialize)] +pub struct TrustConfig { + #[serde(default = "default_trust_default_level")] + pub default_level: TrustLevel, + #[serde(default = "default_trust_local_level")] + pub local_level: TrustLevel, + #[serde(default = "default_trust_hash_mismatch_level")] + pub hash_mismatch_level: TrustLevel, +} + +fn default_trust_default_level() -> TrustLevel { + TrustLevel::Quarantined +} + +fn default_trust_local_level() -> TrustLevel { + TrustLevel::Trusted +} + +fn default_trust_hash_mismatch_level() -> TrustLevel { + TrustLevel::Quarantined +} + +impl Default for TrustConfig { + fn default() -> Self { + Self { + default_level: default_trust_default_level(), + local_level: default_trust_local_level(), + hash_mismatch_level: default_trust_hash_mismatch_level(), + } + } } fn default_max_active_skills() -> usize { @@ -918,6 +953,7 @@ impl Config { paths: vec!["./skills".into()], max_active_skills: default_max_active_skills(), learning: LearningConfig::default(), + trust: TrustConfig::default(), }, memory: MemoryConfig { sqlite_path: "./data/zeph.db".into(), diff --git a/crates/zeph-memory/src/sqlite/mod.rs b/crates/zeph-memory/src/sqlite/mod.rs index 42fdfc72..2bac743b 100644 --- a/crates/zeph-memory/src/sqlite/mod.rs +++ b/crates/zeph-memory/src/sqlite/mod.rs @@ -1,6 +1,7 @@ mod messages; mod skills; mod summaries; +mod trust; use sqlx::SqlitePool; use sqlx::sqlite::{SqliteConnectOptions, SqlitePoolOptions}; @@ -10,6 +11,7 @@ use crate::error::MemoryError; pub use messages::role_str; pub use skills::{SkillMetricsRow, SkillUsageRow, SkillVersionRow}; +pub use trust::SkillTrustRow; #[derive(Debug)] pub struct SqliteStore { diff --git a/crates/zeph-memory/src/sqlite/trust.rs b/crates/zeph-memory/src/sqlite/trust.rs new file mode 100644 index 00000000..3a69b675 --- /dev/null +++ b/crates/zeph-memory/src/sqlite/trust.rs @@ -0,0 +1,350 @@ +use super::SqliteStore; +use crate::error::MemoryError; + +#[derive(Debug, Clone)] +pub struct SkillTrustRow { + pub skill_name: String, + pub trust_level: String, + pub source_kind: String, + pub source_url: Option, + pub source_path: Option, + pub blake3_hash: String, + pub updated_at: String, +} + +type TrustTuple = ( + String, + String, + String, + Option, + Option, + String, + String, +); + +fn row_from_tuple(t: TrustTuple) -> SkillTrustRow { + SkillTrustRow { + skill_name: t.0, + trust_level: t.1, + source_kind: t.2, + source_url: t.3, + source_path: t.4, + blake3_hash: t.5, + updated_at: t.6, + } +} + +impl SqliteStore { + /// Upsert trust metadata for a skill. + /// + /// # Errors + /// + /// Returns an error if the database operation fails. + pub async fn upsert_skill_trust( + &self, + skill_name: &str, + trust_level: &str, + source_kind: &str, + source_url: Option<&str>, + source_path: Option<&str>, + blake3_hash: &str, + ) -> Result<(), MemoryError> { + sqlx::query( + "INSERT INTO skill_trust (skill_name, trust_level, source_kind, source_url, source_path, blake3_hash, updated_at) \ + VALUES (?, ?, ?, ?, ?, ?, datetime('now')) \ + ON CONFLICT(skill_name) DO UPDATE SET \ + trust_level = excluded.trust_level, \ + source_kind = excluded.source_kind, \ + source_url = excluded.source_url, \ + source_path = excluded.source_path, \ + blake3_hash = excluded.blake3_hash, \ + updated_at = datetime('now')", + ) + .bind(skill_name) + .bind(trust_level) + .bind(source_kind) + .bind(source_url) + .bind(source_path) + .bind(blake3_hash) + .execute(&self.pool) + .await?; + Ok(()) + } + + /// Load trust metadata for a single skill. + /// + /// # Errors + /// + /// Returns an error if the query fails. + pub async fn load_skill_trust( + &self, + skill_name: &str, + ) -> Result, MemoryError> { + let row: Option = sqlx::query_as( + "SELECT skill_name, trust_level, source_kind, source_url, source_path, blake3_hash, updated_at \ + FROM skill_trust WHERE skill_name = ?", + ) + .bind(skill_name) + .fetch_optional(&self.pool) + .await?; + Ok(row.map(row_from_tuple)) + } + + /// Load all skill trust entries. + /// + /// # Errors + /// + /// Returns an error if the query fails. + pub async fn load_all_skill_trust(&self) -> Result, MemoryError> { + let rows: Vec = sqlx::query_as( + "SELECT skill_name, trust_level, source_kind, source_url, source_path, blake3_hash, updated_at \ + FROM skill_trust ORDER BY skill_name", + ) + .fetch_all(&self.pool) + .await?; + Ok(rows.into_iter().map(row_from_tuple).collect()) + } + + /// Update only the trust level for a skill. + /// + /// # Errors + /// + /// Returns an error if the skill does not exist or the update fails. + pub async fn set_skill_trust_level( + &self, + skill_name: &str, + trust_level: &str, + ) -> Result { + let result = sqlx::query( + "UPDATE skill_trust SET trust_level = ?, updated_at = datetime('now') WHERE skill_name = ?", + ) + .bind(trust_level) + .bind(skill_name) + .execute(&self.pool) + .await?; + Ok(result.rows_affected() > 0) + } + + /// Delete trust entry for a skill. + /// + /// # Errors + /// + /// Returns an error if the delete fails. + pub async fn delete_skill_trust(&self, skill_name: &str) -> Result { + let result = sqlx::query("DELETE FROM skill_trust WHERE skill_name = ?") + .bind(skill_name) + .execute(&self.pool) + .await?; + Ok(result.rows_affected() > 0) + } + + /// Update the blake3 hash for a skill. + /// + /// # Errors + /// + /// Returns an error if the update fails. + pub async fn update_skill_hash( + &self, + skill_name: &str, + blake3_hash: &str, + ) -> Result { + let result = sqlx::query( + "UPDATE skill_trust SET blake3_hash = ?, updated_at = datetime('now') WHERE skill_name = ?", + ) + .bind(blake3_hash) + .bind(skill_name) + .execute(&self.pool) + .await?; + Ok(result.rows_affected() > 0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + async fn test_store() -> SqliteStore { + SqliteStore::new(":memory:").await.unwrap() + } + + #[tokio::test] + async fn upsert_and_load() { + let store = test_store().await; + + store + .upsert_skill_trust("git", "trusted", "local", None, None, "abc123") + .await + .unwrap(); + + let row = store.load_skill_trust("git").await.unwrap().unwrap(); + assert_eq!(row.skill_name, "git"); + assert_eq!(row.trust_level, "trusted"); + assert_eq!(row.source_kind, "local"); + assert_eq!(row.blake3_hash, "abc123"); + } + + #[tokio::test] + async fn upsert_updates_existing() { + let store = test_store().await; + + store + .upsert_skill_trust("git", "quarantined", "local", None, None, "hash1") + .await + .unwrap(); + store + .upsert_skill_trust("git", "trusted", "local", None, None, "hash2") + .await + .unwrap(); + + let row = store.load_skill_trust("git").await.unwrap().unwrap(); + assert_eq!(row.trust_level, "trusted"); + assert_eq!(row.blake3_hash, "hash2"); + } + + #[tokio::test] + async fn load_nonexistent() { + let store = test_store().await; + let row = store.load_skill_trust("nope").await.unwrap(); + assert!(row.is_none()); + } + + #[tokio::test] + async fn load_all() { + let store = test_store().await; + + store + .upsert_skill_trust("alpha", "trusted", "local", None, None, "h1") + .await + .unwrap(); + store + .upsert_skill_trust( + "beta", + "quarantined", + "hub", + Some("https://hub.example.com"), + None, + "h2", + ) + .await + .unwrap(); + + let rows = store.load_all_skill_trust().await.unwrap(); + assert_eq!(rows.len(), 2); + assert_eq!(rows[0].skill_name, "alpha"); + assert_eq!(rows[1].skill_name, "beta"); + } + + #[tokio::test] + async fn set_trust_level() { + let store = test_store().await; + + store + .upsert_skill_trust("git", "quarantined", "local", None, None, "h1") + .await + .unwrap(); + + let updated = store.set_skill_trust_level("git", "blocked").await.unwrap(); + assert!(updated); + + let row = store.load_skill_trust("git").await.unwrap().unwrap(); + assert_eq!(row.trust_level, "blocked"); + } + + #[tokio::test] + async fn set_trust_level_nonexistent() { + let store = test_store().await; + let updated = store + .set_skill_trust_level("nope", "blocked") + .await + .unwrap(); + assert!(!updated); + } + + #[tokio::test] + async fn delete_trust() { + let store = test_store().await; + + store + .upsert_skill_trust("git", "trusted", "local", None, None, "h1") + .await + .unwrap(); + + let deleted = store.delete_skill_trust("git").await.unwrap(); + assert!(deleted); + + let row = store.load_skill_trust("git").await.unwrap(); + assert!(row.is_none()); + } + + #[tokio::test] + async fn delete_nonexistent() { + let store = test_store().await; + let deleted = store.delete_skill_trust("nope").await.unwrap(); + assert!(!deleted); + } + + #[tokio::test] + async fn update_hash() { + let store = test_store().await; + + store + .upsert_skill_trust("git", "verified", "local", None, None, "old_hash") + .await + .unwrap(); + + let updated = store.update_skill_hash("git", "new_hash").await.unwrap(); + assert!(updated); + + let row = store.load_skill_trust("git").await.unwrap().unwrap(); + assert_eq!(row.blake3_hash, "new_hash"); + } + + #[tokio::test] + async fn source_with_url() { + let store = test_store().await; + + store + .upsert_skill_trust( + "remote-skill", + "quarantined", + "hub", + Some("https://hub.example.com/skill"), + None, + "h1", + ) + .await + .unwrap(); + + let row = store + .load_skill_trust("remote-skill") + .await + .unwrap() + .unwrap(); + assert_eq!(row.source_kind, "hub"); + assert_eq!( + row.source_url.as_deref(), + Some("https://hub.example.com/skill") + ); + } + + #[tokio::test] + async fn source_with_path() { + let store = test_store().await; + + store + .upsert_skill_trust( + "file-skill", + "quarantined", + "file", + None, + Some("/tmp/skill.tar.gz"), + "h1", + ) + .await + .unwrap(); + + let row = store.load_skill_trust("file-skill").await.unwrap().unwrap(); + assert_eq!(row.source_kind, "file"); + assert_eq!(row.source_path.as_deref(), Some("/tmp/skill.tar.gz")); + } +} diff --git a/crates/zeph-skills/Cargo.toml b/crates/zeph-skills/Cargo.toml index c9a54855..2899bbcb 100644 --- a/crates/zeph-skills/Cargo.toml +++ b/crates/zeph-skills/Cargo.toml @@ -8,15 +8,16 @@ repository.workspace = true [features] default = [] -qdrant = ["dep:blake3", "dep:qdrant-client", "dep:serde_json", "dep:uuid", "dep:zeph-memory"] +qdrant = ["dep:qdrant-client", "dep:uuid", "dep:zeph-memory"] self-learning = [] [dependencies] -blake3 = { workspace = true, optional = true } +blake3.workspace = true notify.workspace = true notify-debouncer-mini.workspace = true qdrant-client = { workspace = true, optional = true, features = ["serde"] } -serde_json = { workspace = true, optional = true } +serde = { workspace = true, features = ["derive"] } +serde_json.workspace = true futures.workspace = true thiserror.workspace = true tokio = { workspace = true, features = ["sync", "rt", "time"] } diff --git a/crates/zeph-skills/src/lib.rs b/crates/zeph-skills/src/lib.rs index 1018894c..de8e48f2 100644 --- a/crates/zeph-skills/src/lib.rs +++ b/crates/zeph-skills/src/lib.rs @@ -10,6 +10,8 @@ pub mod prompt; pub mod qdrant_matcher; pub mod registry; pub(crate) mod resource; +pub mod trust; pub mod watcher; pub use error::SkillError; +pub use trust::{SkillSource, SkillTrust, TrustLevel, compute_skill_hash}; diff --git a/crates/zeph-skills/src/prompt.rs b/crates/zeph-skills/src/prompt.rs index df648b7c..3f50ea64 100644 --- a/crates/zeph-skills/src/prompt.rs +++ b/crates/zeph-skills/src/prompt.rs @@ -1,7 +1,9 @@ +use std::collections::HashMap; use std::fmt::Write; use crate::loader::Skill; use crate::resource::discover_resources; +use crate::trust::TrustLevel; const OS_NAMES: &[&str] = &["linux", "macos", "windows"]; @@ -15,7 +17,11 @@ fn should_include_reference(filename: &str, os_family: &str) -> bool { } #[must_use] -pub fn format_skills_prompt(skills: &[Skill], os_family: &str) -> String { +pub fn format_skills_prompt( + skills: &[Skill], + os_family: &str, + trust_levels: &HashMap, +) -> String { if skills.is_empty() { return String::new(); } @@ -23,12 +29,21 @@ pub fn format_skills_prompt(skills: &[Skill], os_family: &str) -> String { let mut out = String::from("\n"); for skill in skills { + let trust = trust_levels + .get(skill.name()) + .copied() + .unwrap_or(TrustLevel::Trusted); + let body = if trust == TrustLevel::Quarantined { + wrap_quarantined(skill.name(), &skill.body) + } else { + skill.body.clone() + }; let _ = write!( out, " \n {}\n \n{}", skill.name(), skill.description(), - skill.body, + body, ); let resources = discover_resources(&skill.meta.skill_dir); @@ -54,6 +69,15 @@ pub fn format_skills_prompt(skills: &[Skill], os_family: &str) -> String { out } +/// Wrap a quarantined skill's prompt with warning markers. +#[must_use] +pub fn wrap_quarantined(skill_name: &str, body: &str) -> String { + format!( + "[QUARANTINED SKILL: {skill_name}] The following skill is quarantined. \ + It has restricted tool access (no bash, file_write, web_scrape).\n\n{body}" + ) +} + #[must_use] pub fn format_skills_catalog(skills: &[Skill]) -> String { if skills.is_empty() { @@ -113,14 +137,14 @@ mod tests { #[test] fn empty_skills_returns_empty_string() { let empty: &[Skill] = &[]; - assert_eq!(format_skills_prompt(empty, "linux"), ""); + assert_eq!(format_skills_prompt(empty, "linux", &HashMap::new()), ""); } #[test] fn single_skill_format() { let skills = vec![make_skill("test", "A test.", "# Hello\nworld")]; - let output = format_skills_prompt(&skills, "linux"); + let output = format_skills_prompt(&skills, "linux", &HashMap::new()); assert!(output.starts_with("")); assert!(output.ends_with("")); assert!(output.contains("")); @@ -135,7 +159,7 @@ mod tests { make_skill("b", "desc b", "body b"), ]; - let output = format_skills_prompt(&skills, "linux"); + let output = format_skills_prompt(&skills, "linux", &HashMap::new()); assert!(output.contains("")); assert!(output.contains("")); } @@ -176,7 +200,7 @@ mod tests { dir.path().to_path_buf(), )]; - let output = format_skills_prompt(&skills, "linux"); + let output = format_skills_prompt(&skills, "linux", &HashMap::new()); assert!(output.contains("# Linux commands")); assert!(!output.contains("# macOS commands")); assert!(output.contains("# Common docs")); @@ -194,11 +218,31 @@ mod tests { dir.path().to_path_buf(), )]; - let output = format_skills_prompt(&skills, "macos"); + let output = format_skills_prompt(&skills, "macos", &HashMap::new()); assert!(output.contains("skill body")); assert!(!output.contains(" u8 { + match self { + Self::Trusted => 0, + Self::Verified => 1, + Self::Quarantined => 2, + Self::Blocked => 3, + } + } + + /// Returns the least-trusted (highest severity) of two levels. + #[must_use] + pub fn min_trust(self, other: Self) -> Self { + if self.severity() >= other.severity() { + self + } else { + other + } + } + + #[must_use] + pub fn is_active(self) -> bool { + !matches!(self, Self::Blocked) + } +} + +impl fmt::Display for TrustLevel { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Trusted => f.write_str("trusted"), + Self::Verified => f.write_str("verified"), + Self::Quarantined => f.write_str("quarantined"), + Self::Blocked => f.write_str("blocked"), + } + } +} + +/// Where a skill was loaded from. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "lowercase")] +pub enum SkillSource { + /// Built-in skill shipped with the binary. + #[default] + Local, + /// Downloaded from a skill hub. + Hub { url: String }, + /// Imported from a local file path. + File { path: PathBuf }, +} + +impl fmt::Display for SkillSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Local => f.write_str("local"), + Self::Hub { url } => write!(f, "hub({url})"), + Self::File { path } => write!(f, "file({})", path.display()), + } + } +} + +/// Trust metadata attached to a loaded skill. +#[derive(Debug, Clone)] +pub struct SkillTrust { + pub skill_name: String, + pub trust_level: TrustLevel, + pub source: SkillSource, + pub blake3_hash: String, +} + +/// Compute blake3 hash of a SKILL.md file. +/// +/// # Errors +/// +/// Returns an IO error if the file cannot be read. +pub fn compute_skill_hash(skill_dir: &Path) -> std::io::Result { + let content = std::fs::read(skill_dir.join("SKILL.md"))?; + Ok(blake3::hash(&content).to_hex().to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn severity_ordering() { + assert!(TrustLevel::Trusted.severity() < TrustLevel::Verified.severity()); + assert!(TrustLevel::Verified.severity() < TrustLevel::Quarantined.severity()); + assert!(TrustLevel::Quarantined.severity() < TrustLevel::Blocked.severity()); + } + + #[test] + fn min_trust_picks_least_trusted() { + assert_eq!( + TrustLevel::Trusted.min_trust(TrustLevel::Quarantined), + TrustLevel::Quarantined + ); + assert_eq!( + TrustLevel::Blocked.min_trust(TrustLevel::Trusted), + TrustLevel::Blocked + ); + } + + #[test] + fn is_active() { + assert!(TrustLevel::Trusted.is_active()); + assert!(TrustLevel::Verified.is_active()); + assert!(TrustLevel::Quarantined.is_active()); + assert!(!TrustLevel::Blocked.is_active()); + } + + #[test] + fn default_is_quarantined() { + assert_eq!(TrustLevel::default(), TrustLevel::Quarantined); + } + + #[test] + fn display() { + assert_eq!(TrustLevel::Trusted.to_string(), "trusted"); + assert_eq!(TrustLevel::Blocked.to_string(), "blocked"); + assert_eq!(SkillSource::Local.to_string(), "local"); + assert_eq!( + SkillSource::Hub { + url: "https://example.com".into() + } + .to_string(), + "hub(https://example.com)" + ); + } + + #[test] + fn serde_roundtrip() { + let level = TrustLevel::Quarantined; + let json = serde_json::to_string(&level).unwrap(); + assert_eq!(json, "\"quarantined\""); + let back: TrustLevel = serde_json::from_str(&json).unwrap(); + assert_eq!(back, level); + } + + #[test] + fn compute_hash() { + let dir = tempfile::tempdir().unwrap(); + std::fs::write(dir.path().join("SKILL.md"), "test content").unwrap(); + let hash = compute_skill_hash(dir.path()).unwrap(); + assert_eq!(hash.len(), 64); // blake3 hex is 64 chars + // Same content = same hash + let hash2 = compute_skill_hash(dir.path()).unwrap(); + assert_eq!(hash, hash2); + } + + #[test] + fn compute_hash_different_content() { + let dir1 = tempfile::tempdir().unwrap(); + let dir2 = tempfile::tempdir().unwrap(); + std::fs::write(dir1.path().join("SKILL.md"), "content a").unwrap(); + std::fs::write(dir2.path().join("SKILL.md"), "content b").unwrap(); + let h1 = compute_skill_hash(dir1.path()).unwrap(); + let h2 = compute_skill_hash(dir2.path()).unwrap(); + assert_ne!(h1, h2); + } + + #[test] + fn source_serde_roundtrip() { + let source = SkillSource::Hub { + url: "https://hub.example.com/skill".into(), + }; + let json = serde_json::to_string(&source).unwrap(); + let back: SkillSource = serde_json::from_str(&json).unwrap(); + assert_eq!(back, source); + } +} diff --git a/crates/zeph-tools/Cargo.toml b/crates/zeph-tools/Cargo.toml index a838998a..1514a5f3 100644 --- a/crates/zeph-tools/Cargo.toml +++ b/crates/zeph-tools/Cargo.toml @@ -20,6 +20,7 @@ thiserror.workspace = true tokio = { workspace = true, features = ["fs", "io-util", "macros", "process", "rt", "sync", "time"] } tracing.workspace = true url.workspace = true +zeph-skills.workspace = true [dev-dependencies] filetime = "0.2" diff --git a/crates/zeph-tools/src/anomaly.rs b/crates/zeph-tools/src/anomaly.rs new file mode 100644 index 00000000..39724813 --- /dev/null +++ b/crates/zeph-tools/src/anomaly.rs @@ -0,0 +1,211 @@ +//! Sliding-window anomaly detection for tool execution patterns. + +use std::collections::VecDeque; + +/// Severity of a detected anomaly. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AnomalySeverity { + Warning, + Critical, +} + +/// A detected anomaly in tool execution patterns. +#[derive(Debug, Clone)] +pub struct Anomaly { + pub severity: AnomalySeverity, + pub description: String, +} + +/// Tracks recent tool execution outcomes and detects anomalous patterns. +#[derive(Debug)] +pub struct AnomalyDetector { + window: VecDeque, + window_size: usize, + error_threshold: f64, + critical_threshold: f64, +} + +#[derive(Debug, Clone, Copy)] +enum Outcome { + Success, + Error, + Blocked, +} + +impl AnomalyDetector { + #[must_use] + pub fn new(window_size: usize, error_threshold: f64, critical_threshold: f64) -> Self { + Self { + window: VecDeque::with_capacity(window_size), + window_size, + error_threshold, + critical_threshold, + } + } + + /// Record a successful tool execution. + pub fn record_success(&mut self) { + self.push(Outcome::Success); + } + + /// Record a failed tool execution. + pub fn record_error(&mut self) { + self.push(Outcome::Error); + } + + /// Record a blocked tool execution. + pub fn record_blocked(&mut self) { + self.push(Outcome::Blocked); + } + + fn push(&mut self, outcome: Outcome) { + if self.window.len() >= self.window_size { + self.window.pop_front(); + } + self.window.push_back(outcome); + } + + /// Check the current window for anomalies. + #[must_use] + #[allow(clippy::cast_precision_loss)] + pub fn check(&self) -> Option { + if self.window.len() < 3 { + return None; + } + + let total = self.window.len(); + let errors = self + .window + .iter() + .filter(|o| matches!(o, Outcome::Error | Outcome::Blocked)) + .count(); + + let ratio = errors as f64 / total as f64; + + if ratio >= self.critical_threshold { + Some(Anomaly { + severity: AnomalySeverity::Critical, + description: format!( + "error rate {:.0}% ({errors}/{total}) exceeds critical threshold", + ratio * 100.0, + ), + }) + } else if ratio >= self.error_threshold { + Some(Anomaly { + severity: AnomalySeverity::Warning, + description: format!( + "error rate {:.0}% ({errors}/{total}) exceeds warning threshold", + ratio * 100.0, + ), + }) + } else { + None + } + } + + /// Reset the sliding window. + pub fn reset(&mut self) { + self.window.clear(); + } +} + +impl Default for AnomalyDetector { + fn default() -> Self { + Self::new(10, 0.5, 0.8) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn no_anomaly_on_success() { + let mut det = AnomalyDetector::default(); + for _ in 0..10 { + det.record_success(); + } + assert!(det.check().is_none()); + } + + #[test] + fn warning_on_half_errors() { + let mut det = AnomalyDetector::new(10, 0.5, 0.8); + for _ in 0..5 { + det.record_success(); + } + for _ in 0..5 { + det.record_error(); + } + let anomaly = det.check().unwrap(); + assert_eq!(anomaly.severity, AnomalySeverity::Warning); + } + + #[test] + fn critical_on_high_errors() { + let mut det = AnomalyDetector::new(10, 0.5, 0.8); + for _ in 0..2 { + det.record_success(); + } + for _ in 0..8 { + det.record_error(); + } + let anomaly = det.check().unwrap(); + assert_eq!(anomaly.severity, AnomalySeverity::Critical); + } + + #[test] + fn blocked_counts_as_error() { + let mut det = AnomalyDetector::new(10, 0.5, 0.8); + for _ in 0..2 { + det.record_success(); + } + for _ in 0..8 { + det.record_blocked(); + } + let anomaly = det.check().unwrap(); + assert_eq!(anomaly.severity, AnomalySeverity::Critical); + } + + #[test] + fn window_slides() { + let mut det = AnomalyDetector::new(5, 0.5, 0.8); + for _ in 0..5 { + det.record_error(); + } + assert!(det.check().is_some()); + + // Push 5 successes to slide out errors + for _ in 0..5 { + det.record_success(); + } + assert!(det.check().is_none()); + } + + #[test] + fn too_few_samples_returns_none() { + let mut det = AnomalyDetector::default(); + det.record_error(); + det.record_error(); + assert!(det.check().is_none()); + } + + #[test] + fn reset_clears_window() { + let mut det = AnomalyDetector::new(5, 0.5, 0.8); + for _ in 0..5 { + det.record_error(); + } + assert!(det.check().is_some()); + det.reset(); + assert!(det.check().is_none()); + } + + #[test] + fn default_thresholds() { + let det = AnomalyDetector::default(); + assert_eq!(det.window_size, 10); + assert!((det.error_threshold - 0.5).abs() < f64::EPSILON); + assert!((det.critical_threshold - 0.8).abs() < f64::EPSILON); + } +} diff --git a/crates/zeph-tools/src/lib.rs b/crates/zeph-tools/src/lib.rs index 7499343e..082e5ea2 100644 --- a/crates/zeph-tools/src/lib.rs +++ b/crates/zeph-tools/src/lib.rs @@ -1,5 +1,6 @@ //! Tool execution abstraction and shell backend. +pub mod anomaly; pub mod audit; pub mod composite; pub mod config; @@ -10,7 +11,9 @@ pub mod permissions; pub mod registry; pub mod scrape; pub mod shell; +pub mod trust_gate; +pub use anomaly::{AnomalyDetector, AnomalySeverity}; pub use audit::{AuditEntry, AuditLogger, AuditResult}; pub use composite::CompositeExecutor; pub use config::{AuditConfig, ScrapeConfig, ShellConfig, ToolsConfig}; @@ -26,3 +29,4 @@ pub use permissions::{ pub use registry::ToolRegistry; pub use scrape::WebScrapeExecutor; pub use shell::ShellExecutor; +pub use trust_gate::TrustGateExecutor; diff --git a/crates/zeph-tools/src/trust_gate.rs b/crates/zeph-tools/src/trust_gate.rs new file mode 100644 index 00000000..017c0b73 --- /dev/null +++ b/crates/zeph-tools/src/trust_gate.rs @@ -0,0 +1,246 @@ +//! Trust-level enforcement layer for tool execution. + +use zeph_skills::TrustLevel; + +use crate::executor::{ToolCall, ToolError, ToolExecutor, ToolOutput}; +use crate::permissions::{PermissionAction, PermissionPolicy}; +use crate::registry::ToolDef; + +/// Tools denied when a Quarantined skill is active. +const QUARANTINE_DENIED: &[&str] = &["bash", "file_write", "web_scrape"]; + +/// Wraps an inner `ToolExecutor` and applies trust-level permission overlays. +#[derive(Debug)] +pub struct TrustGateExecutor { + inner: T, + policy: PermissionPolicy, + effective_trust: TrustLevel, +} + +impl TrustGateExecutor { + #[must_use] + pub fn new(inner: T, policy: PermissionPolicy) -> Self { + Self { + inner, + policy, + effective_trust: TrustLevel::Trusted, + } + } + + pub fn set_effective_trust(&mut self, level: TrustLevel) { + self.effective_trust = level; + } + + #[must_use] + pub fn effective_trust(&self) -> TrustLevel { + self.effective_trust + } + + fn check_trust(&self, tool_id: &str, input: &str) -> Result<(), ToolError> { + match self.effective_trust { + TrustLevel::Blocked => { + return Err(ToolError::Blocked { + command: "all tools blocked (trust=blocked)".to_owned(), + }); + } + TrustLevel::Quarantined => { + if QUARANTINE_DENIED.contains(&tool_id) { + return Err(ToolError::Blocked { + command: format!("{tool_id} denied (trust=quarantined)"), + }); + } + } + TrustLevel::Trusted | TrustLevel::Verified => {} + } + + match self.policy.check(tool_id, input) { + PermissionAction::Allow => Ok(()), + PermissionAction::Ask => Err(ToolError::ConfirmationRequired { + command: input.to_owned(), + }), + PermissionAction::Deny => Err(ToolError::Blocked { + command: input.to_owned(), + }), + } + } +} + +impl ToolExecutor for TrustGateExecutor { + async fn execute(&self, response: &str) -> Result, ToolError> { + if self.effective_trust == TrustLevel::Blocked { + return Err(ToolError::Blocked { + command: "all tools blocked (trust=blocked)".to_owned(), + }); + } + self.inner.execute(response).await + } + + async fn execute_confirmed(&self, response: &str) -> Result, ToolError> { + if self.effective_trust == TrustLevel::Blocked { + return Err(ToolError::Blocked { + command: "all tools blocked (trust=blocked)".to_owned(), + }); + } + self.inner.execute_confirmed(response).await + } + + fn tool_definitions(&self) -> Vec { + self.inner.tool_definitions() + } + + async fn execute_tool_call(&self, call: &ToolCall) -> Result, ToolError> { + let input = call + .params + .get("command") + .and_then(|v| v.as_str()) + .unwrap_or(""); + self.check_trust(&call.tool_id, input)?; + self.inner.execute_tool_call(call).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[derive(Debug)] + struct MockExecutor; + impl ToolExecutor for MockExecutor { + async fn execute(&self, _: &str) -> Result, ToolError> { + Ok(None) + } + async fn execute_tool_call( + &self, + call: &ToolCall, + ) -> Result, ToolError> { + Ok(Some(ToolOutput { + tool_name: call.tool_id.clone(), + summary: "ok".into(), + blocks_executed: 1, + })) + } + } + + fn make_call(tool_id: &str) -> ToolCall { + ToolCall { + tool_id: tool_id.into(), + params: HashMap::new(), + } + } + + fn make_call_with_cmd(tool_id: &str, cmd: &str) -> ToolCall { + let mut params = HashMap::new(); + params.insert("command".into(), serde_json::Value::String(cmd.into())); + ToolCall { + tool_id: tool_id.into(), + params, + } + } + + #[tokio::test] + async fn trusted_allows_all() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Trusted); + + let result = gate.execute_tool_call(&make_call("bash")).await; + // Default policy returns Ask for unknown tools + assert!(matches!( + result, + Err(ToolError::ConfirmationRequired { .. }) + )); + } + + #[tokio::test] + async fn quarantined_denies_bash() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Quarantined); + + let result = gate.execute_tool_call(&make_call("bash")).await; + assert!(matches!(result, Err(ToolError::Blocked { .. }))); + } + + #[tokio::test] + async fn quarantined_denies_file_write() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Quarantined); + + let result = gate.execute_tool_call(&make_call("file_write")).await; + assert!(matches!(result, Err(ToolError::Blocked { .. }))); + } + + #[tokio::test] + async fn quarantined_allows_file_read() { + let policy = crate::permissions::PermissionPolicy::from_legacy(&[], &[]); + let mut gate = TrustGateExecutor::new(MockExecutor, policy); + gate.set_effective_trust(TrustLevel::Quarantined); + + let result = gate.execute_tool_call(&make_call("file_read")).await; + // file_read is not in quarantine denied list, and policy has no rules for file_read => Ask + assert!(matches!( + result, + Err(ToolError::ConfirmationRequired { .. }) + )); + } + + #[tokio::test] + async fn blocked_denies_everything() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Blocked); + + let result = gate.execute_tool_call(&make_call("file_read")).await; + assert!(matches!(result, Err(ToolError::Blocked { .. }))); + } + + #[tokio::test] + async fn policy_deny_overrides_trust() { + let policy = crate::permissions::PermissionPolicy::from_legacy(&["sudo".into()], &[]); + let mut gate = TrustGateExecutor::new(MockExecutor, policy); + gate.set_effective_trust(TrustLevel::Trusted); + + let result = gate + .execute_tool_call(&make_call_with_cmd("bash", "sudo rm")) + .await; + assert!(matches!(result, Err(ToolError::Blocked { .. }))); + } + + #[tokio::test] + async fn blocked_denies_execute() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Blocked); + + let result = gate.execute("some response").await; + assert!(matches!(result, Err(ToolError::Blocked { .. }))); + } + + #[tokio::test] + async fn blocked_denies_execute_confirmed() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Blocked); + + let result = gate.execute_confirmed("some response").await; + assert!(matches!(result, Err(ToolError::Blocked { .. }))); + } + + #[tokio::test] + async fn trusted_allows_execute() { + let mut gate = TrustGateExecutor::new(MockExecutor, PermissionPolicy::default()); + gate.set_effective_trust(TrustLevel::Trusted); + + let result = gate.execute("some response").await; + assert!(result.is_ok()); + } + + #[tokio::test] + async fn verified_with_allow_policy_succeeds() { + let policy = crate::permissions::PermissionPolicy::from_legacy(&[], &[]); + let mut gate = TrustGateExecutor::new(MockExecutor, policy); + gate.set_effective_trust(TrustLevel::Verified); + + let result = gate + .execute_tool_call(&make_call_with_cmd("bash", "echo hi")) + .await + .unwrap(); + assert!(result.is_some()); + } +} diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index d30db9eb..6c3e9bfe 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -20,6 +20,7 @@ - [Local Inference (Candle)](guide/candle.md) - [Model Orchestrator](guide/orchestrator.md) - [Self-Learning Skills](guide/self-learning.md) +- [Skill Trust Levels](guide/skill-trust.md) - [A2A Protocol](guide/a2a.md) - [Secrets Management](guide/vault.md) - [Channels (CLI, Telegram, TUI)](guide/channels.md) diff --git a/docs/src/guide/skill-trust.md b/docs/src/guide/skill-trust.md new file mode 100644 index 00000000..f7e9f818 --- /dev/null +++ b/docs/src/guide/skill-trust.md @@ -0,0 +1,74 @@ +# Skill Trust Levels + +Zeph assigns a trust level to every loaded skill, controlling which tools it can invoke. This prevents untrusted or tampered skills from executing dangerous operations like shell commands or file writes. + +## Trust Tiers + +| Level | Tool Access | Description | +|-------|-------------|-------------| +| **Trusted** | Full | Built-in or user-audited skills. No restrictions. | +| **Verified** | Full | Hash-verified skills. Default tool access applies. | +| **Quarantined** | Restricted | Newly imported or hash-mismatch skills. `bash`, `file_write`, and `web_scrape` are denied. | +| **Blocked** | None | Explicitly disabled. All tool calls are rejected. | + +The default trust level for newly discovered skills is `quarantined`. Local (built-in) skills default to `trusted`. + +## Integrity Verification + +Each skill's `SKILL.md` content is hashed with BLAKE3 on load. The hash is stored in SQLite alongside the skill's trust level and source metadata. On hot-reload, the new hash is compared against the stored value. If a mismatch is detected, the skill is downgraded to the configured `hash_mismatch_level` (default: `quarantined`). + +## Quarantine Enforcement + +When a quarantined skill is active, `TrustGateExecutor` intercepts tool calls and blocks access to `bash`, `file_write`, and `web_scrape`. Other tools (e.g., `file_read`) remain subject to the normal permission policy. + +Quarantined skill bodies are also wrapped with a structural prefix in the system prompt, making the LLM aware of the restriction: + +``` +[QUARANTINED SKILL: ] The following skill is quarantined. +It has restricted tool access (no bash, file_write, web_scrape). +``` + +## Anomaly Detection + +An `AnomalyDetector` tracks tool execution outcomes in a sliding window (default: 10 events). If the error/blocked ratio exceeds configurable thresholds, an anomaly is reported: + +| Threshold | Default | Severity | +|-----------|---------|----------| +| Warning | 50% | Logged as warning | +| Critical | 80% | May trigger auto-block | + +The detector requires at least 3 events before producing a result. + +## Self-Learning Gate + +Skills with trust level below `Verified` are excluded from self-learning improvement. This prevents the LLM from generating improved versions of untrusted skill content. + +## CLI Commands + +| Command | Description | +|---------|-------------| +| `/skill trust` | List all skills with their trust level, source, and hash | +| `/skill trust ` | Show trust details for a specific skill | +| `/skill trust ` | Set trust level (`trusted`, `verified`, `quarantined`, `blocked`) | +| `/skill block ` | Block a skill (all tool access denied) | +| `/skill unblock ` | Unblock a skill (reverts to `quarantined`) | + +## Configuration + +```toml +[skills.trust] +# Trust level for newly discovered skills +default_level = "quarantined" +# Trust level for local (built-in) skills +local_level = "trusted" +# Trust level assigned after BLAKE3 hash mismatch on hot-reload +hash_mismatch_level = "quarantined" +``` + +Environment variable overrides: + +```bash +export ZEPH_SKILLS_TRUST_DEFAULT_LEVEL=quarantined +export ZEPH_SKILLS_TRUST_LOCAL_LEVEL=trusted +export ZEPH_SKILLS_TRUST_HASH_MISMATCH_LEVEL=quarantined +``` diff --git a/docs/src/guide/skills.md b/docs/src/guide/skills.md index 782fa043..d905aafa 100644 --- a/docs/src/guide/skills.md +++ b/docs/src/guide/skills.md @@ -132,8 +132,12 @@ Only metadata (~100 tokens per skill) is loaded at startup for embedding and mat With 50+ skills installed, a typical prompt still contains only 5 — saving thousands of tokens per request compared to naive full-injection approaches. +## Trust Levels + +Every skill is assigned a trust level (`trusted`, `verified`, `quarantined`, `blocked`) that controls which tools it can invoke. Local skills default to `trusted`; newly imported or hash-mismatch skills start as `quarantined` with restricted tool access. See [Skill Trust Levels](skill-trust.md) for details. + ## Hot Reload -SKILL.md file changes are detected via filesystem watcher (500ms debounce) and re-embedded without restart. Cached bodies are invalidated on reload. +SKILL.md file changes are detected via filesystem watcher (500ms debounce) and re-embedded without restart. Cached bodies are invalidated on reload. If the BLAKE3 content hash changes, the skill's trust level may be downgraded according to the `hash_mismatch_level` configuration. With the Qdrant backend, hot-reload triggers a delta sync — only modified skills are re-embedded and updated in the collection. diff --git a/migrations/009_skill_trust.sql b/migrations/009_skill_trust.sql new file mode 100644 index 00000000..d8984a47 --- /dev/null +++ b/migrations/009_skill_trust.sql @@ -0,0 +1,9 @@ +CREATE TABLE IF NOT EXISTS skill_trust ( + skill_name TEXT PRIMARY KEY NOT NULL, + trust_level TEXT NOT NULL DEFAULT 'quarantined', + source_kind TEXT NOT NULL DEFAULT 'local', + source_url TEXT, + source_path TEXT, + blake3_hash TEXT NOT NULL, + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +);