From 9f04ce0ebc8fe068dc14a5687b37991cfda38d36 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sat, 14 Feb 2026 13:08:47 +0100 Subject: [PATCH 1/2] feat(tools): add tool registry, file executor, and doom-loop detection (#239, #240, #241, #242, #243, #244, #245) Introduce structured tool definitions via ToolRegistry with 7 built-in tools, sandboxed FileExecutor for file operations, and CompositeExecutor routing by tool_id. Raise iteration cap from 3 to configurable 10 with doom-loop detection (3 consecutive identical outputs) and context budget check at 80% threshold. --- CHANGELOG.md | 14 +- Cargo.lock | 2 + Cargo.toml | 2 + config/default.toml | 2 + crates/zeph-core/src/agent.rs | 105 ++++++- crates/zeph-core/src/config.rs | 7 + crates/zeph-core/src/context.rs | 23 +- crates/zeph-tools/Cargo.toml | 2 + crates/zeph-tools/src/composite.rs | 93 +++++- crates/zeph-tools/src/executor.rs | 40 +++ crates/zeph-tools/src/file.rs | 478 +++++++++++++++++++++++++++++ crates/zeph-tools/src/lib.rs | 6 +- crates/zeph-tools/src/registry.rs | 274 +++++++++++++++++ src/main.rs | 1 + 14 files changed, 1030 insertions(+), 19 deletions(-) create mode 100644 crates/zeph-tools/src/file.rs create mode 100644 crates/zeph-tools/src/registry.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 870e96b6..d65887c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,17 +7,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] ### Added +- `ToolRegistry` with typed `ToolDef` definitions for 7 built-in tools (bash, read, edit, write, glob, grep, web_scrape) (#239) +- `FileExecutor` for sandboxed file operations: read, write, edit, glob, grep (#242) +- `ToolCall` struct and `execute_tool_call()` on `ToolExecutor` trait for structured tool invocation (#241) +- `CompositeExecutor` routes structured tool calls to correct sub-executor by tool_id (#243) +- Tool catalog section in system prompt via `ToolRegistry::format_for_prompt()` (#244) +- Configurable `max_tool_iterations` (default 10, previously hardcoded 3) via TOML and `ZEPH_AGENT_MAX_TOOL_ITERATIONS` env var (#245) +- Doom-loop detection: breaks agent loop on 3 consecutive identical tool outputs +- Context budget check at 80% threshold stops iteration before context overflow - `IndexWatcher` for incremental code index updates on file changes via `notify` file watcher (#233) - `watch` config field in `[index]` section (default `true`) to enable/disable file watching +- Repo map cache with configurable TTL (`repo_map_ttl_secs`, default 300s) to avoid per-message filesystem traversal (#231) +- Cross-session memory score threshold (`cross_session_score_threshold`, default 0.35) to filter low-relevance results (#232) ### Fixed - Persist `MessagePart` data to SQLite via `remember_with_parts()` — pruning state now survives session restarts (#229) - Clear tool output body from memory after Tier 1 pruning to reclaim heap (#230) -### Added -- Repo map cache with configurable TTL (`repo_map_ttl_secs`, default 300s) to avoid per-message filesystem traversal (#231) -- Cross-session memory score threshold (`cross_session_score_threshold`, default 0.35) to filter low-relevance results (#232) - ## [0.9.4] - 2026-02-14 ### Added diff --git a/Cargo.lock b/Cargo.lock index 6fbf56d7..a47dd14a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8323,6 +8323,8 @@ dependencies = [ name = "zeph-tools" version = "0.9.4" dependencies = [ + "glob", + "regex", "reqwest 0.13.2", "scrape-core", "serde", diff --git a/Cargo.toml b/Cargo.toml index f7c76f7b..33e6f3ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ crossterm = "0.29" axum = "0.8" blake3 = "1.8" criterion = "0.8" +glob = "0.3.3" futures = "0.3" ignore = "0.4" hf-hub = { version = "0.4", default-features = false, features = ["tokio", "rustls-tls", "ureq"] } @@ -32,6 +33,7 @@ ollama-rs = { version = "0.3", default-features = false, features = ["rustls", " pulldown-cmark = "0.13" qdrant-client = { version = "1.16", default-features = false } ratatui = "0.30" +regex = "1.12" reqwest = { version = "0.13", default-features = false } rmcp = "0.14" scrape-core = "0.2.2" diff --git a/config/default.toml b/config/default.toml index d7af1689..5f961583 100644 --- a/config/default.toml +++ b/config/default.toml @@ -1,6 +1,8 @@ [agent] # Agent display name name = "Zeph" +# Maximum tool execution iterations per user message (doom-loop protection) +max_tool_iterations = 10 [llm] # LLM provider: "ollama" for local models or "claude" for Claude API diff --git a/crates/zeph-core/src/agent.rs b/crates/zeph-core/src/agent.rs index 1cdbce8d..ffcc23fb 100644 --- a/crates/zeph-core/src/agent.rs +++ b/crates/zeph-core/src/agent.rs @@ -26,8 +26,7 @@ use crate::context::{ContextBudget, EnvironmentContext, build_system_prompt}; use crate::redact::redact_secrets; use zeph_memory::semantic::estimate_tokens; -// TODO(M14): Make configurable via AgentConfig (currently hardcoded for MVP) -const MAX_SHELL_ITERATIONS: usize = 3; +const DOOM_LOOP_WINDOW: usize = 3; const MAX_QUEUE_SIZE: usize = 10; const MESSAGE_MERGE_WINDOW: Duration = Duration::from_millis(500); const RECALL_PREFIX: &str = "[semantic recall]\n"; @@ -100,6 +99,8 @@ pub struct Agent #[cfg(feature = "index")] repo_map_ttl: std::time::Duration, warmup_ready: Option>, + max_tool_iterations: usize, + doom_loop_history: Vec, } impl Agent { @@ -118,7 +119,7 @@ impl Agent Agent Self { + self.max_tool_iterations = max; + self + } + #[must_use] pub fn with_memory( mut self, @@ -1605,7 +1614,7 @@ impl Agent Agent = if let Some(matcher) = &self.matcher { @@ -1710,8 +1720,18 @@ impl Agent Agent anyhow::Result<()> { - for _ in 0..MAX_SHELL_ITERATIONS { + self.doom_loop_history.clear(); + + for iteration in 0..self.max_tool_iterations { self.channel.send_typing().await?; + // Context budget check at 80% threshold + if let Some(ref budget) = self.context_budget { + let used: usize = self + .messages + .iter() + .map(|m| estimate_tokens(&m.content)) + .sum(); + let threshold = budget.max_tokens() * 4 / 5; + if used >= threshold { + tracing::warn!( + iteration, + used, + threshold, + "stopping tool loop: context budget nearing limit" + ); + self.channel + .send("Stopping: context window is nearly full.") + .await?; + break; + } + } + let Some(response) = self.call_llm_with_timeout().await? else { return Ok(()); }; @@ -1869,6 +1913,25 @@ impl Agent= DOOM_LOOP_WINDOW { + let recent = + &self.doom_loop_history[self.doom_loop_history.len() - DOOM_LOOP_WINDOW..]; + if recent.windows(2).all(|w| w[0] == w[1]) { + tracing::warn!( + iteration, + "doom-loop detected: {DOOM_LOOP_WINDOW} consecutive identical outputs" + ); + self.channel + .send("Stopping: detected repeated identical tool outputs.") + .await?; + break; + } + } + } } Ok(()) @@ -3382,7 +3445,7 @@ mod agent_tests { .iter() .filter(|m| m.role == Role::Assistant) .count(); - assert!(assistant_count <= MAX_SHELL_ITERATIONS); + assert!(assistant_count <= 10); } #[test] @@ -4560,4 +4623,32 @@ mod agent_tests { assert_eq!(filtered[0].summary_text, "high score"); assert_eq!(filtered[1].summary_text, "at threshold"); } + + #[test] + fn doom_loop_detection_triggers_on_identical_outputs() { + let s = "same output".to_owned(); + let history = vec![s.clone(), s.clone(), s]; + let recent = &history[history.len() - DOOM_LOOP_WINDOW..]; + assert!(recent.windows(2).all(|w| w[0] == w[1])); + } + + #[test] + fn doom_loop_detection_no_trigger_on_different_outputs() { + let history = vec![ + "output a".to_owned(), + "output b".to_owned(), + "output c".to_owned(), + ]; + let recent = &history[history.len() - DOOM_LOOP_WINDOW..]; + assert!(!recent.windows(2).all(|w| w[0] == w[1])); + } + + #[test] + fn context_budget_80_percent_threshold() { + let budget = ContextBudget::new(1000, 0.20); + let threshold = budget.max_tokens() * 4 / 5; + assert_eq!(threshold, 800); + assert!(800 >= threshold); // at threshold → should stop + assert!(799 < threshold); // below threshold → should continue + } } diff --git a/crates/zeph-core/src/config.rs b/crates/zeph-core/src/config.rs index 04c9ab57..83f5265c 100644 --- a/crates/zeph-core/src/config.rs +++ b/crates/zeph-core/src/config.rs @@ -32,9 +32,15 @@ pub struct Config { pub secrets: ResolvedSecrets, } +fn default_max_tool_iterations() -> usize { + 10 +} + #[derive(Debug, Deserialize)] pub struct AgentConfig { pub name: String, + #[serde(default = "default_max_tool_iterations")] + pub max_tool_iterations: usize, } #[derive(Debug, Deserialize)] @@ -864,6 +870,7 @@ impl Config { Self { agent: AgentConfig { name: "Zeph".into(), + max_tool_iterations: 10, }, llm: LlmConfig { provider: "ollama".into(), diff --git a/crates/zeph-core/src/context.rs b/crates/zeph-core/src/context.rs index 24c1d1cf..46ceae55 100644 --- a/crates/zeph-core/src/context.rs +++ b/crates/zeph-core/src/context.rs @@ -37,7 +37,11 @@ the user explicitly asks about a skill by name.\n\ - Do not execute commands that could cause data loss without confirmation."; #[must_use] -pub fn build_system_prompt(skills_prompt: &str, env: Option<&EnvironmentContext>) -> String { +pub fn build_system_prompt( + skills_prompt: &str, + env: Option<&EnvironmentContext>, + tool_catalog: Option<&str>, +) -> String { let mut prompt = BASE_PROMPT.to_string(); if let Some(env) = env { @@ -45,6 +49,13 @@ pub fn build_system_prompt(skills_prompt: &str, env: Option<&EnvironmentContext> prompt.push_str(&env.format()); } + if let Some(catalog) = tool_catalog + && !catalog.is_empty() + { + prompt.push_str("\n\n"); + prompt.push_str(catalog); + } + if !skills_prompt.is_empty() { prompt.push_str("\n\n"); prompt.push_str(skills_prompt); @@ -187,14 +198,14 @@ mod tests { #[test] fn without_skills() { - let prompt = build_system_prompt("", None); + let prompt = build_system_prompt("", None, None); assert!(prompt.starts_with("You are Zeph")); assert!(!prompt.contains("available_skills")); } #[test] fn with_skills() { - let prompt = build_system_prompt("test", None); + let prompt = build_system_prompt("test", None, None); assert!(prompt.contains("You are Zeph")); assert!(prompt.contains("")); } @@ -308,7 +319,7 @@ mod tests { os: "linux".into(), model_name: "test".into(), }; - let prompt = build_system_prompt("skills here", Some(&env)); + let prompt = build_system_prompt("skills here", Some(&env), None); assert!(prompt.contains("You are Zeph")); assert!(prompt.contains("")); assert!(prompt.contains("skills here")); @@ -316,7 +327,7 @@ mod tests { #[test] fn build_system_prompt_without_env() { - let prompt = build_system_prompt("skills here", None); + let prompt = build_system_prompt("skills here", None, None); assert!(prompt.contains("You are Zeph")); assert!(!prompt.contains("")); assert!(prompt.contains("skills here")); @@ -324,7 +335,7 @@ mod tests { #[test] fn base_prompt_contains_guidelines() { - let prompt = build_system_prompt("", None); + let prompt = build_system_prompt("", None, None); assert!(prompt.contains("## Tool Use")); assert!(prompt.contains("## Guidelines")); assert!(prompt.contains("## Security")); diff --git a/crates/zeph-tools/Cargo.toml b/crates/zeph-tools/Cargo.toml index 5a227c52..c660eaf3 100644 --- a/crates/zeph-tools/Cargo.toml +++ b/crates/zeph-tools/Cargo.toml @@ -7,6 +7,8 @@ license.workspace = true repository.workspace = true [dependencies] +glob.workspace = true +regex.workspace = true reqwest = { workspace = true, features = ["rustls"] } scrape-core.workspace = true serde = { workspace = true, features = ["derive"] } diff --git a/crates/zeph-tools/src/composite.rs b/crates/zeph-tools/src/composite.rs index 130e800e..3b8394a7 100644 --- a/crates/zeph-tools/src/composite.rs +++ b/crates/zeph-tools/src/composite.rs @@ -1,4 +1,5 @@ -use crate::executor::{ToolError, ToolExecutor, ToolOutput}; +use crate::executor::{ToolCall, ToolError, ToolExecutor, ToolOutput}; +use crate::registry::ToolDef; /// Chains two `ToolExecutor` implementations with first-match-wins dispatch. /// @@ -31,6 +32,19 @@ impl ToolExecutor for CompositeExecutor } self.second.execute_confirmed(response).await } + + fn tool_definitions(&self) -> Vec { + let mut defs = self.first.tool_definitions(); + defs.extend(self.second.tool_definitions()); + defs + } + + async fn execute_tool_call(&self, call: &ToolCall) -> Result, ToolError> { + if let Some(output) = self.first.execute_tool_call(call).await? { + return Ok(Some(output)); + } + self.second.execute_tool_call(call).await + } } #[cfg(test)] @@ -134,4 +148,81 @@ mod tests { let debug = format!("{composite:?}"); assert!(debug.contains("CompositeExecutor")); } + + #[derive(Debug)] + struct FileToolExecutor; + impl ToolExecutor for FileToolExecutor { + async fn execute(&self, _: &str) -> Result, ToolError> { + Ok(None) + } + async fn execute_tool_call( + &self, + call: &ToolCall, + ) -> Result, ToolError> { + if call.tool_id == "read" || call.tool_id == "write" { + Ok(Some(ToolOutput { + tool_name: call.tool_id.clone(), + summary: "file_handler".to_owned(), + blocks_executed: 1, + })) + } else { + Ok(None) + } + } + } + + #[derive(Debug)] + struct ShellToolExecutor; + impl ToolExecutor for ShellToolExecutor { + async fn execute(&self, _: &str) -> Result, ToolError> { + Ok(None) + } + async fn execute_tool_call( + &self, + call: &ToolCall, + ) -> Result, ToolError> { + if call.tool_id == "bash" { + Ok(Some(ToolOutput { + tool_name: "bash".to_owned(), + summary: "shell_handler".to_owned(), + blocks_executed: 1, + })) + } else { + Ok(None) + } + } + } + + #[tokio::test] + async fn tool_call_routes_to_file_executor() { + let composite = CompositeExecutor::new(FileToolExecutor, ShellToolExecutor); + let call = ToolCall { + tool_id: "read".to_owned(), + params: std::collections::HashMap::new(), + }; + let result = composite.execute_tool_call(&call).await.unwrap().unwrap(); + assert_eq!(result.summary, "file_handler"); + } + + #[tokio::test] + async fn tool_call_routes_to_shell_executor() { + let composite = CompositeExecutor::new(FileToolExecutor, ShellToolExecutor); + let call = ToolCall { + tool_id: "bash".to_owned(), + params: std::collections::HashMap::new(), + }; + let result = composite.execute_tool_call(&call).await.unwrap().unwrap(); + assert_eq!(result.summary, "shell_handler"); + } + + #[tokio::test] + async fn tool_call_unhandled_returns_none() { + let composite = CompositeExecutor::new(FileToolExecutor, ShellToolExecutor); + let call = ToolCall { + tool_id: "unknown".to_owned(), + params: std::collections::HashMap::new(), + }; + let result = composite.execute_tool_call(&call).await.unwrap(); + assert!(result.is_none()); + } } diff --git a/crates/zeph-tools/src/executor.rs b/crates/zeph-tools/src/executor.rs index 9a288b56..ad511913 100644 --- a/crates/zeph-tools/src/executor.rs +++ b/crates/zeph-tools/src/executor.rs @@ -1,5 +1,13 @@ +use std::collections::HashMap; use std::fmt; +/// Structured tool invocation from LLM. +#[derive(Debug, Clone)] +pub struct ToolCall { + pub tool_id: String, + pub params: HashMap, +} + /// Structured result from tool execution. #[derive(Debug, Clone)] pub struct ToolOutput { @@ -94,6 +102,19 @@ pub trait ToolExecutor: Send + Sync { ) -> impl Future, ToolError>> + Send { self.execute(response) } + + /// Return tool definitions this executor can handle. + fn tool_definitions(&self) -> Vec { + vec![] + } + + /// Execute a structured tool call. Returns `None` if `tool_id` is not handled. + fn execute_tool_call( + &self, + _call: &ToolCall, + ) -> impl Future, ToolError>> + Send { + std::future::ready(Ok(None)) + } } /// Extract fenced code blocks with the given language marker from text. @@ -201,4 +222,23 @@ mod tests { assert!(result.contains("truncated")); assert!(result.contains("chars")); } + + #[derive(Debug)] + struct DefaultExecutor; + impl ToolExecutor for DefaultExecutor { + async fn execute(&self, _response: &str) -> Result, ToolError> { + Ok(None) + } + } + + #[tokio::test] + async fn execute_tool_call_default_returns_none() { + let exec = DefaultExecutor; + let call = ToolCall { + tool_id: "anything".to_owned(), + params: std::collections::HashMap::new(), + }; + let result = exec.execute_tool_call(&call).await.unwrap(); + assert!(result.is_none()); + } } diff --git a/crates/zeph-tools/src/file.rs b/crates/zeph-tools/src/file.rs new file mode 100644 index 00000000..6bb50738 --- /dev/null +++ b/crates/zeph-tools/src/file.rs @@ -0,0 +1,478 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +use crate::executor::{ToolError, ToolOutput}; + +/// File operations executor sandboxed to allowed paths. +#[derive(Debug)] +pub struct FileExecutor { + allowed_paths: Vec, +} + +impl FileExecutor { + #[must_use] + pub fn new(allowed_paths: Vec) -> Self { + let paths = if allowed_paths.is_empty() { + vec![std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))] + } else { + allowed_paths + }; + Self { + allowed_paths: paths + .into_iter() + .map(|p| p.canonicalize().unwrap_or(p)) + .collect(), + } + } + + fn validate_path(&self, path: &Path) -> Result { + let resolved = if path.is_absolute() { + path.to_path_buf() + } else { + std::env::current_dir() + .unwrap_or_else(|_| PathBuf::from(".")) + .join(path) + }; + let canonical = resolve_via_ancestors(&resolved); + if !self.allowed_paths.iter().any(|a| canonical.starts_with(a)) { + return Err(ToolError::SandboxViolation { + path: canonical.display().to_string(), + }); + } + Ok(canonical) + } + + /// Execute a tool call by `tool_id` and params. + /// + /// # Errors + /// + /// Returns `ToolError` on sandbox violations or I/O failures. + pub fn execute_file_tool( + &self, + tool_id: &str, + params: &HashMap, + ) -> Result, ToolError> { + match tool_id { + "read" => self.handle_read(params), + "write" => self.handle_write(params), + "edit" => self.handle_edit(params), + "glob" => self.handle_glob(params), + "grep" => self.handle_grep(params), + _ => Ok(None), + } + } + + fn handle_read( + &self, + params: &HashMap, + ) -> Result, ToolError> { + let path_str = param_str(params, "path")?; + let path = self.validate_path(Path::new(&path_str))?; + + let content = std::fs::read_to_string(&path)?; + + let offset = param_usize(params, "offset").unwrap_or(0); + let limit = param_usize(params, "limit").unwrap_or(usize::MAX); + + let selected: Vec = content + .lines() + .skip(offset) + .take(limit) + .enumerate() + .map(|(i, line)| format!("{:>4}\t{line}", offset + i + 1)) + .collect(); + + Ok(Some(ToolOutput { + tool_name: "read".to_owned(), + summary: selected.join("\n"), + blocks_executed: 1, + })) + } + + fn handle_write( + &self, + params: &HashMap, + ) -> Result, ToolError> { + let path_str = param_str(params, "path")?; + let content = param_str(params, "content")?; + let path = Path::new(&path_str); + + self.validate_path(path)?; + + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + std::fs::write(path, &content)?; + + Ok(Some(ToolOutput { + tool_name: "write".to_owned(), + summary: format!("Wrote {} bytes to {path_str}", content.len()), + blocks_executed: 1, + })) + } + + fn handle_edit( + &self, + params: &HashMap, + ) -> Result, ToolError> { + let path_str = param_str(params, "path")?; + let old_string = param_str(params, "old_string")?; + let new_string = param_str(params, "new_string")?; + let path = self.validate_path(Path::new(&path_str))?; + + let content = std::fs::read_to_string(&path)?; + if !content.contains(&old_string) { + return Err(ToolError::Execution(std::io::Error::new( + std::io::ErrorKind::NotFound, + format!("old_string not found in {path_str}"), + ))); + } + + let new_content = content.replacen(&old_string, &new_string, 1); + std::fs::write(&path, &new_content)?; + + Ok(Some(ToolOutput { + tool_name: "edit".to_owned(), + summary: format!("Edited {path_str}"), + blocks_executed: 1, + })) + } + + fn handle_glob( + &self, + params: &HashMap, + ) -> Result, ToolError> { + let pattern = param_str(params, "pattern")?; + let matches: Vec = glob::glob(&pattern) + .map_err(|e| { + ToolError::Execution(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + e.to_string(), + )) + })? + .filter_map(Result::ok) + .filter(|p| { + let canonical = p.canonicalize().unwrap_or_else(|_| p.clone()); + self.allowed_paths.iter().any(|a| canonical.starts_with(a)) + }) + .map(|p| p.display().to_string()) + .collect(); + + Ok(Some(ToolOutput { + tool_name: "glob".to_owned(), + summary: if matches.is_empty() { + format!("No files matching: {pattern}") + } else { + matches.join("\n") + }, + blocks_executed: 1, + })) + } + + fn handle_grep( + &self, + params: &HashMap, + ) -> Result, ToolError> { + let pattern = param_str(params, "pattern")?; + let search_path = params.get("path").and_then(|v| v.as_str()).unwrap_or("."); + let case_sensitive = params + .get("case_sensitive") + .and_then(serde_json::Value::as_bool) + .unwrap_or(true); + + let path = self.validate_path(Path::new(search_path))?; + + let regex = if case_sensitive { + regex::Regex::new(&pattern) + } else { + regex::RegexBuilder::new(&pattern) + .case_insensitive(true) + .build() + } + .map_err(|e| { + ToolError::Execution(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + e.to_string(), + )) + })?; + + let mut results = Vec::new(); + grep_recursive(&path, ®ex, &mut results, 100)?; + + Ok(Some(ToolOutput { + tool_name: "grep".to_owned(), + summary: if results.is_empty() { + format!("No matches for: {pattern}") + } else { + results.join("\n") + }, + blocks_executed: 1, + })) + } +} + +/// Canonicalize a path by walking up to the nearest existing ancestor. +fn resolve_via_ancestors(path: &Path) -> PathBuf { + let mut existing = path; + let mut suffix = PathBuf::new(); + while !existing.exists() { + if let Some(parent) = existing.parent() { + if let Some(name) = existing.file_name() { + suffix = PathBuf::from(name).join(&suffix); + } + existing = parent; + } else { + break; + } + } + let base = existing.canonicalize().unwrap_or(existing.to_path_buf()); + if suffix.as_os_str().is_empty() { + base + } else { + base.join(&suffix) + } +} + +const IGNORED_DIRS: &[&str] = &[".git", "target", "node_modules", ".hg"]; + +fn grep_recursive( + path: &Path, + regex: ®ex::Regex, + results: &mut Vec, + limit: usize, +) -> Result<(), ToolError> { + if results.len() >= limit { + return Ok(()); + } + if path.is_file() { + if let Ok(content) = std::fs::read_to_string(path) { + for (i, line) in content.lines().enumerate() { + if regex.is_match(line) { + results.push(format!("{}:{}: {line}", path.display(), i + 1)); + if results.len() >= limit { + return Ok(()); + } + } + } + } + } else if path.is_dir() { + let entries = std::fs::read_dir(path)?; + for entry in entries.flatten() { + let p = entry.path(); + let name = p.file_name().and_then(|n| n.to_str()); + if name.is_some_and(|n| n.starts_with('.') || IGNORED_DIRS.contains(&n)) { + continue; + } + grep_recursive(&p, regex, results, limit)?; + } + } + Ok(()) +} + +fn param_str(params: &HashMap, key: &str) -> Result { + params + .get(key) + .and_then(|v| v.as_str()) + .map(str::to_owned) + .ok_or_else(|| { + ToolError::Execution(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("missing required parameter: {key}"), + )) + }) +} + +fn param_usize(params: &HashMap, key: &str) -> Option { + #[allow(clippy::cast_possible_truncation)] + params + .get(key) + .and_then(serde_json::Value::as_u64) + .map(|n| n as usize) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn temp_dir() -> tempfile::TempDir { + tempfile::tempdir().unwrap() + } + + fn make_params(pairs: &[(&str, serde_json::Value)]) -> HashMap { + pairs + .iter() + .map(|(k, v)| ((*k).to_owned(), v.clone())) + .collect() + } + + #[test] + fn read_file() { + let dir = temp_dir(); + let file = dir.path().join("test.txt"); + fs::write(&file, "line1\nline2\nline3\n").unwrap(); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[("path", serde_json::json!(file.to_str().unwrap()))]); + let result = exec.execute_file_tool("read", ¶ms).unwrap().unwrap(); + assert_eq!(result.tool_name, "read"); + assert!(result.summary.contains("line1")); + assert!(result.summary.contains("line3")); + } + + #[test] + fn read_with_offset_and_limit() { + let dir = temp_dir(); + let file = dir.path().join("test.txt"); + fs::write(&file, "a\nb\nc\nd\ne\n").unwrap(); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[ + ("path", serde_json::json!(file.to_str().unwrap())), + ("offset", serde_json::json!(1)), + ("limit", serde_json::json!(2)), + ]); + let result = exec.execute_file_tool("read", ¶ms).unwrap().unwrap(); + assert!(result.summary.contains("b")); + assert!(result.summary.contains("c")); + assert!(!result.summary.contains("a")); + assert!(!result.summary.contains("d")); + } + + #[test] + fn write_file() { + let dir = temp_dir(); + let file = dir.path().join("out.txt"); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[ + ("path", serde_json::json!(file.to_str().unwrap())), + ("content", serde_json::json!("hello world")), + ]); + let result = exec.execute_file_tool("write", ¶ms).unwrap().unwrap(); + assert!(result.summary.contains("11 bytes")); + assert_eq!(fs::read_to_string(&file).unwrap(), "hello world"); + } + + #[test] + fn edit_file() { + let dir = temp_dir(); + let file = dir.path().join("edit.txt"); + fs::write(&file, "foo bar baz").unwrap(); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[ + ("path", serde_json::json!(file.to_str().unwrap())), + ("old_string", serde_json::json!("bar")), + ("new_string", serde_json::json!("qux")), + ]); + let result = exec.execute_file_tool("edit", ¶ms).unwrap().unwrap(); + assert!(result.summary.contains("Edited")); + assert_eq!(fs::read_to_string(&file).unwrap(), "foo qux baz"); + } + + #[test] + fn edit_not_found() { + let dir = temp_dir(); + let file = dir.path().join("edit.txt"); + fs::write(&file, "foo bar").unwrap(); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[ + ("path", serde_json::json!(file.to_str().unwrap())), + ("old_string", serde_json::json!("nonexistent")), + ("new_string", serde_json::json!("x")), + ]); + let result = exec.execute_file_tool("edit", ¶ms); + assert!(result.is_err()); + } + + #[test] + fn sandbox_violation() { + let dir = temp_dir(); + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[("path", serde_json::json!("/etc/passwd"))]); + let result = exec.execute_file_tool("read", ¶ms); + assert!(matches!(result, Err(ToolError::SandboxViolation { .. }))); + } + + #[test] + fn unknown_tool_returns_none() { + let exec = FileExecutor::new(vec![]); + let params = HashMap::new(); + let result = exec.execute_file_tool("unknown", ¶ms).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn glob_finds_files() { + let dir = temp_dir(); + fs::write(dir.path().join("a.rs"), "").unwrap(); + fs::write(dir.path().join("b.rs"), "").unwrap(); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let pattern = format!("{}/*.rs", dir.path().display()); + let params = make_params(&[("pattern", serde_json::json!(pattern))]); + let result = exec.execute_file_tool("glob", ¶ms).unwrap().unwrap(); + assert!(result.summary.contains("a.rs")); + assert!(result.summary.contains("b.rs")); + } + + #[test] + fn grep_finds_matches() { + let dir = temp_dir(); + fs::write( + dir.path().join("test.txt"), + "hello world\nfoo bar\nhello again\n", + ) + .unwrap(); + + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[ + ("pattern", serde_json::json!("hello")), + ("path", serde_json::json!(dir.path().to_str().unwrap())), + ]); + let result = exec.execute_file_tool("grep", ¶ms).unwrap().unwrap(); + assert!(result.summary.contains("hello world")); + assert!(result.summary.contains("hello again")); + assert!(!result.summary.contains("foo bar")); + } + + #[test] + fn write_sandbox_bypass_nonexistent_path() { + let dir = temp_dir(); + let exec = FileExecutor::new(vec![dir.path().to_path_buf()]); + let params = make_params(&[ + ("path", serde_json::json!("/tmp/evil/escape.txt")), + ("content", serde_json::json!("pwned")), + ]); + let result = exec.execute_file_tool("write", ¶ms); + assert!(matches!(result, Err(ToolError::SandboxViolation { .. }))); + assert!(!Path::new("/tmp/evil/escape.txt").exists()); + } + + #[test] + fn glob_filters_outside_sandbox() { + let sandbox = temp_dir(); + let outside = temp_dir(); + fs::write(outside.path().join("secret.rs"), "secret").unwrap(); + + let exec = FileExecutor::new(vec![sandbox.path().to_path_buf()]); + let pattern = format!("{}/*.rs", outside.path().display()); + let params = make_params(&[("pattern", serde_json::json!(pattern))]); + let result = exec.execute_file_tool("glob", ¶ms).unwrap().unwrap(); + assert!(!result.summary.contains("secret.rs")); + } + + #[test] + fn grep_relative_path_validated() { + let sandbox = temp_dir(); + let exec = FileExecutor::new(vec![sandbox.path().to_path_buf()]); + let params = make_params(&[ + ("pattern", serde_json::json!("password")), + ("path", serde_json::json!("../../etc")), + ]); + let result = exec.execute_file_tool("grep", ¶ms); + assert!(matches!(result, Err(ToolError::SandboxViolation { .. }))); + } +} diff --git a/crates/zeph-tools/src/lib.rs b/crates/zeph-tools/src/lib.rs index 2befe148..32a55c51 100644 --- a/crates/zeph-tools/src/lib.rs +++ b/crates/zeph-tools/src/lib.rs @@ -4,6 +4,8 @@ pub mod audit; pub mod composite; pub mod config; pub mod executor; +pub mod file; +pub mod registry; pub mod scrape; pub mod shell; @@ -11,8 +13,10 @@ pub use audit::{AuditEntry, AuditLogger, AuditResult}; pub use composite::CompositeExecutor; pub use config::{AuditConfig, ScrapeConfig, ShellConfig, ToolsConfig}; pub use executor::{ - MAX_TOOL_OUTPUT_CHARS, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput, + MAX_TOOL_OUTPUT_CHARS, ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput, truncate_tool_output, }; +pub use file::FileExecutor; +pub use registry::ToolRegistry; pub use scrape::WebScrapeExecutor; pub use shell::ShellExecutor; diff --git a/crates/zeph-tools/src/registry.rs b/crates/zeph-tools/src/registry.rs new file mode 100644 index 00000000..b5c1c138 --- /dev/null +++ b/crates/zeph-tools/src/registry.rs @@ -0,0 +1,274 @@ +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ParamType { + String, + Integer, + Boolean, +} + +impl fmt::Display for ParamType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::String => f.write_str("string"), + Self::Integer => f.write_str("integer"), + Self::Boolean => f.write_str("boolean"), + } + } +} + +#[derive(Debug, Clone)] +pub struct ParamDef { + pub name: &'static str, + pub description: &'static str, + pub required: bool, + pub param_type: ParamType, +} + +#[derive(Debug, Clone)] +pub struct ToolDef { + pub id: &'static str, + pub description: &'static str, + pub parameters: Vec, +} + +#[derive(Debug)] +pub struct ToolRegistry { + tools: Vec, +} + +impl ToolRegistry { + #[must_use] + pub fn new() -> Self { + Self { + tools: builtin_tools(), + } + } + + #[must_use] + pub fn tools(&self) -> &[ToolDef] { + &self.tools + } + + #[must_use] + pub fn find(&self, id: &str) -> Option<&ToolDef> { + self.tools.iter().find(|t| t.id == id) + } + + #[must_use] + pub fn format_for_prompt(&self) -> String { + use std::fmt::Write; + let mut out = String::from("\n"); + for tool in &self.tools { + let _ = writeln!(out, "## {}", tool.id); + let _ = writeln!(out, "{}", tool.description); + if !tool.parameters.is_empty() { + let _ = writeln!(out, "Parameters:"); + for p in &tool.parameters { + let req = if p.required { "required" } else { "optional" }; + let _ = writeln!( + out, + " - {}: {} ({}, {})", + p.name, p.description, p.param_type, req + ); + } + } + out.push('\n'); + } + out.push_str(""); + out + } +} + +impl Default for ToolRegistry { + fn default() -> Self { + Self::new() + } +} + +fn builtin_tools() -> Vec { + vec![ + ToolDef { + id: "bash", + description: "Execute a shell command", + parameters: vec![ParamDef { + name: "command", + description: "The bash command to execute", + required: true, + param_type: ParamType::String, + }], + }, + ToolDef { + id: "read", + description: "Read file contents", + parameters: vec![ + ParamDef { + name: "path", + description: "Absolute or relative file path", + required: true, + param_type: ParamType::String, + }, + ParamDef { + name: "offset", + description: "Line number to start reading from", + required: false, + param_type: ParamType::Integer, + }, + ParamDef { + name: "limit", + description: "Number of lines to read", + required: false, + param_type: ParamType::Integer, + }, + ], + }, + ToolDef { + id: "edit", + description: "Replace a string in a file", + parameters: vec![ + ParamDef { + name: "path", + description: "File path to edit", + required: true, + param_type: ParamType::String, + }, + ParamDef { + name: "old_string", + description: "Text to find and replace", + required: true, + param_type: ParamType::String, + }, + ParamDef { + name: "new_string", + description: "Replacement text", + required: true, + param_type: ParamType::String, + }, + ], + }, + ToolDef { + id: "write", + description: "Write content to a file", + parameters: vec![ + ParamDef { + name: "path", + description: "File path to write", + required: true, + param_type: ParamType::String, + }, + ParamDef { + name: "content", + description: "Content to write", + required: true, + param_type: ParamType::String, + }, + ], + }, + ToolDef { + id: "glob", + description: "Find files matching a glob pattern", + parameters: vec![ParamDef { + name: "pattern", + description: "Glob pattern (e.g. **/*.rs)", + required: true, + param_type: ParamType::String, + }], + }, + ToolDef { + id: "grep", + description: "Search file contents with regex", + parameters: vec![ + ParamDef { + name: "pattern", + description: "Regex pattern to search for", + required: true, + param_type: ParamType::String, + }, + ParamDef { + name: "path", + description: "Directory or file to search in", + required: false, + param_type: ParamType::String, + }, + ParamDef { + name: "case_sensitive", + description: "Whether search is case-sensitive", + required: false, + param_type: ParamType::Boolean, + }, + ], + }, + ToolDef { + id: "web_scrape", + description: "Scrape data from a web page via CSS selectors", + parameters: vec![ParamDef { + name: "url", + description: "HTTPS URL to scrape", + required: true, + param_type: ParamType::String, + }], + }, + ] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn registry_has_7_builtin_tools() { + let reg = ToolRegistry::new(); + assert_eq!(reg.tools().len(), 7); + } + + #[test] + fn find_existing_tool() { + let reg = ToolRegistry::new(); + assert!(reg.find("bash").is_some()); + assert!(reg.find("read").is_some()); + assert!(reg.find("web_scrape").is_some()); + } + + #[test] + fn find_nonexistent_returns_none() { + let reg = ToolRegistry::new(); + assert!(reg.find("nonexistent").is_none()); + } + + #[test] + fn format_for_prompt_contains_all_tools() { + let reg = ToolRegistry::new(); + let prompt = reg.format_for_prompt(); + assert!(prompt.contains("")); + assert!(prompt.contains("")); + assert!(prompt.contains("## bash")); + assert!(prompt.contains("## read")); + assert!(prompt.contains("## edit")); + assert!(prompt.contains("## write")); + assert!(prompt.contains("## glob")); + assert!(prompt.contains("## grep")); + assert!(prompt.contains("## web_scrape")); + } + + #[test] + fn format_for_prompt_shows_param_info() { + let reg = ToolRegistry::new(); + let prompt = reg.format_for_prompt(); + assert!(prompt.contains("required")); + assert!(prompt.contains("optional")); + assert!(prompt.contains("string")); + } + + #[test] + fn param_type_display() { + assert_eq!(ParamType::String.to_string(), "string"); + assert_eq!(ParamType::Integer.to_string(), "integer"); + assert_eq!(ParamType::Boolean.to_string(), "boolean"); + } + + #[test] + fn default_registry() { + let reg = ToolRegistry::default(); + assert_eq!(reg.tools().len(), 7); + } +} diff --git a/src/main.rs b/src/main.rs index beccd480..61ef7725 100644 --- a/src/main.rs +++ b/src/main.rs @@ -303,6 +303,7 @@ async fn main() -> anyhow::Result<()> { config.skills.max_active_skills, tool_executor, ) + .with_max_tool_iterations(config.agent.max_tool_iterations) .with_model_name(config.llm.model.clone()) .with_embedding_model(embed_model.clone()) .with_skill_reload(skill_paths, reload_rx) From 30ed6776762c9f1b862e7a9d36f57506fada5312 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sat, 14 Feb 2026 13:17:08 +0100 Subject: [PATCH 2/2] docs: add tool system guide and update architecture docs for M18 Add guide/tools.md covering tool registry, file executor sandbox, dual-mode execution, iteration cap, and doom-loop detection. Update crates.md, security.md, configuration.md, SUMMARY.md, and README.md. --- README.md | 8 +-- docs/src/SUMMARY.md | 1 + docs/src/architecture/crates.md | 9 ++- docs/src/getting-started/configuration.md | 3 + docs/src/guide/tools.md | 79 +++++++++++++++++++++++ docs/src/security.md | 19 ++++++ 6 files changed, 112 insertions(+), 7 deletions(-) create mode 100644 docs/src/guide/tools.md diff --git a/README.md b/README.md index cbe97bc1..f6448adc 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,11 @@ Lightweight AI agent that routes tasks across **Ollama, Claude, OpenAI, and Hugg **Token-efficient by design.** Most agent frameworks inject every tool and instruction into every prompt. Zeph embeds skills and MCP tools as vectors, then selects only the top-K relevant ones per query via cosine similarity. Prompt size stays O(K) — not O(N) — regardless of how many capabilities are installed. -**Intelligent context management.** Two-tier context pruning: Tier 1 selectively removes old tool outputs (clearing bodies from memory after persisting to SQLite) before falling back to Tier 2 LLM-based compaction, reducing unnecessary LLM calls. A token-based protection zone preserves recent context from pruning. Cross-session memory transfers knowledge between conversations with relevance filtering. Proportional budget allocation (8% summaries, 8% semantic recall, 4% cross-session, 30% code context, 50% recent history) keeps conversations efficient. Tool outputs are truncated at 30K chars with optional LLM-based summarization for large outputs. ZEPH.md project config discovery walks up the directory tree and injects project-specific context when available. Config hot-reload applies runtime-safe fields (timeouts, security, memory limits) on file change without restart. +**Intelligent context management.** Two-tier context pruning: Tier 1 selectively removes old tool outputs (clearing bodies from memory after persisting to SQLite) before falling back to Tier 2 LLM-based compaction, reducing unnecessary LLM calls. A token-based protection zone preserves recent context from pruning. Cross-session memory transfers knowledge between conversations with relevance filtering. Proportional budget allocation (8% summaries, 8% semantic recall, 4% cross-session, 30% code context, 50% recent history) keeps conversations efficient. Tool outputs are truncated at 30K chars with optional LLM-based summarization for large outputs. Doom-loop detection breaks runaway tool cycles after 3 identical consecutive outputs, with configurable iteration limits (default 10). ZEPH.md project config discovery walks up the directory tree and injects project-specific context when available. Config hot-reload applies runtime-safe fields (timeouts, security, memory limits) on file change without restart. **Run anywhere.** Local models via Ollama or Candle (GGUF with Metal/CUDA), cloud APIs (Claude, OpenAI, GPT-compatible endpoints like Together AI and Groq), or all of them at once through the multi-model orchestrator with automatic fallback chains. -**Production-ready security.** Shell sandboxing with path restrictions, command filtering (12 blocked patterns), destructive command confirmation, secret redaction, audit logging, SSRF protection, and Trivy-scanned container images with 0 HIGH/CRITICAL CVEs. +**Production-ready security.** Shell sandboxing with path restrictions, command filtering (12 blocked patterns), destructive command confirmation, file operation sandbox with path traversal protection, secret redaction, audit logging, SSRF protection, and Trivy-scanned container images with 0 HIGH/CRITICAL CVEs. **Self-improving.** Skills evolve through failure detection, self-reflection, and LLM-generated improvements — with optional manual approval before activation. @@ -99,7 +99,7 @@ cargo build --release --features tui | **Self-Learning** | Skills evolve via failure detection and LLM-generated improvements | [Self-Learning](https://bug-ops.github.io/zeph/guide/self-learning.html) | | **TUI Dashboard** | ratatui terminal UI with markdown rendering, deferred model warmup, scrollbar, mouse scroll, thinking blocks, conversation history, splash screen, live metrics, message queueing (max 10, FIFO with Ctrl+K clear) | [TUI](https://bug-ops.github.io/zeph/guide/tui.html) | | **Multi-Channel I/O** | CLI, Telegram, and TUI with streaming support | [Channels](https://bug-ops.github.io/zeph/guide/channels.html) | -| **Defense-in-Depth** | Shell sandbox, command filter, secret redaction, audit log, SSRF protection | [Security](https://bug-ops.github.io/zeph/security.html) | +| **Defense-in-Depth** | Shell sandbox, file sandbox with path traversal protection, command filter, secret redaction, audit log, SSRF protection, doom-loop detection | [Security](https://bug-ops.github.io/zeph/security.html) | ## Architecture @@ -111,7 +111,7 @@ zeph (binary) ├── zeph-memory — SQLite + Qdrant, semantic recall, summarization ├── zeph-index — AST-based code indexing, semantic retrieval, repo map (optional) ├── zeph-channels — Telegram adapter (teloxide) with streaming -├── zeph-tools — shell executor, web scraper, composite tool dispatch +├── zeph-tools — 7 built-in tools (shell, file, web scrape, fetch, grep, glob, think), tool registry, composite dispatch ├── zeph-mcp — MCP client, multi-server lifecycle, unified tool matching ├── zeph-a2a — A2A client + server, agent discovery, JSON-RPC 2.0 └── zeph-tui — ratatui TUI dashboard with live agent metrics (optional) diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index ecb61522..a89a97c7 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -23,6 +23,7 @@ - [A2A Protocol](guide/a2a.md) - [Secrets Management](guide/vault.md) - [Channels (CLI, Telegram, TUI)](guide/channels.md) +- [Tool System](guide/tools.md) - [TUI Dashboard](guide/tui.md) - [Code Indexing](guide/code-indexing.md) diff --git a/docs/src/architecture/crates.md b/docs/src/architecture/crates.md index 35b380b2..91eae6aa 100644 --- a/docs/src/architecture/crates.md +++ b/docs/src/architecture/crates.md @@ -6,7 +6,7 @@ Each workspace crate has a focused responsibility. All leaf crates are independe Agent loop, configuration loading, and context builder. -- `Agent` — main agent loop with streaming support and message queue drain +- `Agent` — main agent loop with streaming support, message queue drain, configurable `max_tool_iterations` (default 10), doom-loop detection, and context budget check (stops at 80% threshold) - `Config` — TOML config loading with env var overrides - `Channel` trait — abstraction for I/O (CLI, Telegram, TUI) with `recv()`, `try_recv()`, `send_queue_count()` for queue management - Context builder — assembles system prompt from skills, memory, summaries, environment, and project config @@ -61,10 +61,13 @@ Channel implementations for the Zeph agent. Tool execution abstraction and shell backend. -- `ToolExecutor` trait — accepts LLM response, returns tool output +- `ToolExecutor` trait — accepts LLM response or structured `ToolCall`, returns tool output +- `ToolRegistry` — typed definitions for 7 built-in tools (bash, read, edit, write, glob, grep, web_scrape), injected into system prompt as `` catalog +- `ToolCall` / `execute_tool_call()` — structured tool invocation with typed parameters alongside legacy bash extraction (dual-mode) +- `FileExecutor` — sandboxed file operations (read, write, edit, glob, grep) with ancestor-walk path canonicalization - `ShellExecutor` — bash block parser, command safety filter, sandbox validation - `WebScrapeExecutor` — HTML scraping with CSS selectors, SSRF protection -- `CompositeExecutor` — generic chaining with first-match-wins dispatch +- `CompositeExecutor` — generic chaining with first-match-wins dispatch, routes structured tool calls by `tool_id` to the appropriate backend - `AuditLogger` — structured JSON audit trail for all executions - `truncate_tool_output()` — head+tail split at 30K chars with UTF-8 safe boundaries diff --git a/docs/src/getting-started/configuration.md b/docs/src/getting-started/configuration.md index aecd2039..5d2de397 100644 --- a/docs/src/getting-started/configuration.md +++ b/docs/src/getting-started/configuration.md @@ -30,6 +30,7 @@ Zeph watches the config file for changes and applies runtime-safe fields without | `[memory]` | `history_limit`, `summarization_threshold`, `context_budget_tokens`, `compaction_threshold`, `compaction_preserve_tail`, `prune_protect_tokens`, `cross_session_score_threshold` | | `[memory.semantic]` | `recall_limit` | | `[index]` | `repo_map_ttl_secs`, `watch` | +| `[agent]` | `max_tool_iterations` | | `[skills]` | `max_active_skills` | **Not reloadable** (require restart): LLM provider/model, SQLite path, Qdrant URL, Telegram token, MCP servers, A2A config, skill paths. @@ -41,6 +42,7 @@ Check for `config reloaded` in the log to confirm a successful reload. ```toml [agent] name = "Zeph" +max_tool_iterations = 10 # Max tool loop iterations per response (default: 10) [llm] provider = "ollama" @@ -141,6 +143,7 @@ rate_limit = 60 | `ZEPH_MEMORY_SEMANTIC_ENABLED` | Enable semantic memory with Qdrant (default: false) | | `ZEPH_MEMORY_RECALL_LIMIT` | Max semantically relevant messages to recall (default: 5) | | `ZEPH_SKILLS_MAX_ACTIVE` | Max skills per query via embedding match (default: 5) | +| `ZEPH_AGENT_MAX_TOOL_ITERATIONS` | Max tool loop iterations per response (default: 10) | | `ZEPH_TOOLS_SUMMARIZE_OUTPUT` | Enable LLM-based tool output summarization (default: false) | | `ZEPH_TOOLS_TIMEOUT` | Shell command timeout in seconds (default: 30) | | `ZEPH_TOOLS_SCRAPE_TIMEOUT` | Web scrape request timeout in seconds (default: 15) | diff --git a/docs/src/guide/tools.md b/docs/src/guide/tools.md new file mode 100644 index 00000000..dc27ecd6 --- /dev/null +++ b/docs/src/guide/tools.md @@ -0,0 +1,79 @@ +# Tool System + +Zeph provides a typed tool system that gives the LLM structured access to file operations, shell commands, and web scraping. The system supports two execution modes: fenced bash block extraction (legacy) and structured tool calls with typed parameters. + +## Tool Registry + +`ToolRegistry` defines 7 built-in tools that are injected into the system prompt as a `` catalog so the LLM knows what is available. + +| Tool ID | Description | Required Parameters | Optional Parameters | +|---------|-------------|---------------------|---------------------| +| `bash` | Execute a shell command | `command` (string) | | +| `read` | Read file contents | `path` (string) | `offset` (integer), `limit` (integer) | +| `edit` | Replace a string in a file | `path` (string), `old_string` (string), `new_string` (string) | | +| `write` | Write content to a file | `path` (string), `content` (string) | | +| `glob` | Find files matching a glob pattern | `pattern` (string) | | +| `grep` | Search file contents with regex | `pattern` (string) | `path` (string), `case_sensitive` (boolean) | +| `web_scrape` | Scrape data from a web page via CSS selectors | `url` (string) | | + +## FileExecutor + +`FileExecutor` handles the file-oriented tools (`read`, `write`, `edit`, `glob`, `grep`) in a sandboxed environment. All file paths are validated against an allowlist before any I/O operation. + +- If `allowed_paths` is empty, the sandbox defaults to the current working directory. +- Paths are resolved via ancestor-walk canonicalization to prevent traversal attacks on non-existing paths. +- `glob` results are filtered post-match to exclude files outside the sandbox. +- `grep` validates the search directory before scanning. + +See [Security](../security.md#file-executor-sandbox) for details on the path validation mechanism. + +## Dual-Mode Execution + +The agent loop supports two tool invocation modes: + +1. **Bash extraction** -- the original mode. The LLM emits fenced ` ```bash ``` ` blocks, and `ShellExecutor` parses and runs them through the safety filter. +2. **Structured tool calls** -- the LLM emits a `ToolCall` with `tool_id` and typed `params`. `CompositeExecutor` routes the call to the appropriate backend (`FileExecutor` for file tools, `ShellExecutor` for `bash`, `WebScrapeExecutor` for `web_scrape`). + +Both modes coexist in the same iteration. The agent first checks for structured tool calls, then falls back to bash block extraction. + +## Iteration Control + +The agent loop iterates tool execution until the LLM produces a response with no tool invocations, or one of the safety limits is hit. + +### Iteration cap + +Controlled by `max_tool_iterations` (default: 10). The previous hardcoded limit of 3 is replaced by this configurable value. + +```toml +[agent] +max_tool_iterations = 10 +``` + +Environment variable: `ZEPH_AGENT_MAX_TOOL_ITERATIONS`. + +### Doom-loop detection + +If 3 consecutive tool iterations produce identical output strings, the loop breaks and the agent notifies the user. This prevents infinite loops where the LLM repeatedly issues the same failing command. + +### Context budget check + +At the start of each iteration, the agent estimates total token usage. If usage exceeds 80% of the configured `context_budget_tokens`, the loop stops to avoid exceeding the model's context window. + +## Configuration + +```toml +[agent] +max_tool_iterations = 10 # Max tool loop iterations (default: 10) + +[tools] +enabled = true +summarize_output = false + +[tools.shell] +timeout = 30 +allowed_paths = [] # Sandbox directories (empty = cwd only) +``` + +| Variable | Description | +|----------|-------------| +| `ZEPH_AGENT_MAX_TOOL_ITERATIONS` | Max tool loop iterations (default: 10) | diff --git a/docs/src/security.md b/docs/src/security.md index 69e713b9..3023445d 100644 --- a/docs/src/security.md +++ b/docs/src/security.md @@ -47,6 +47,25 @@ Commands matching `confirm_patterns` trigger an interactive confirmation before - Default patterns: `rm`, `git push -f`, `git push --force`, `drop table`, `drop database`, `truncate` - Configurable via `tools.shell.confirm_patterns` in TOML +## File Executor Sandbox + +`FileExecutor` enforces the same `allowed_paths` sandbox as the shell executor for all file operations (`read`, `write`, `edit`, `glob`, `grep`). + +**Path validation:** +- All paths are resolved to absolute form and canonicalized before access +- Non-existing paths (e.g., for `write`) use ancestor-walk canonicalization: the resolver walks up the path tree to the nearest existing ancestor, canonicalizes it, then re-appends the remaining segments. This prevents symlink and `..` traversal on paths that do not yet exist on disk +- If the resolved path does not fall under any entry in `allowed_paths`, the operation is rejected with a `SandboxViolation` error + +**Glob and grep enforcement:** +- `glob` results are post-filtered: matched paths outside the sandbox are silently excluded +- `grep` validates the search root directory before scanning begins + +**Configuration** is shared with the shell sandbox: +```toml +[tools.shell] +allowed_paths = ["/home/user/workspace"] # Empty = cwd only +``` + ## Audit Logging Structured JSON audit log for all tool executions: