From 77c3a59ffec6b44dfc4209c4c412c8755ef45cc4 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 19:22:51 +0100 Subject: [PATCH 01/11] feat: add native tool_use for Claude and OpenAI providers (#253) Extend LlmProvider trait with supports_tool_use() and chat_with_tools() default methods. Add ToolDefinition, ChatResponse, and ToolUseRequest types to zeph-llm. Add ToolUse/ToolResult variants to MessagePart with serde support and flatten_parts() handling. Implement Anthropic tool_use format in ClaudeProvider with structured content blocks for tool_use/tool_result messages. Implement OpenAI function calling format in OpenAiProvider with tool_calls parsing and tool role messages. Add dual-mode agent loop: process_response() branches on supports_tool_use() into native tool path with structured execution loop, doom-loop detection, and context budget checks. Legacy text extraction path unchanged for Ollama/Candle providers. Delegate new trait methods through AnyProvider. Backward compatible via default implementations. Closes #254, closes #255, closes #256, closes #257, closes #258 --- crates/zeph-core/src/agent/streaming.rs | 207 ++++++++++++- crates/zeph-llm/src/any.rs | 14 +- crates/zeph-llm/src/claude.rs | 364 ++++++++++++++++++++++- crates/zeph-llm/src/openai.rs | 380 +++++++++++++++++++++++- crates/zeph-llm/src/provider.rs | 203 +++++++++++++ 5 files changed, 1159 insertions(+), 9 deletions(-) diff --git a/crates/zeph-core/src/agent/streaming.rs b/crates/zeph-core/src/agent/streaming.rs index 323707ef..26dce2b2 100644 --- a/crates/zeph-core/src/agent/streaming.rs +++ b/crates/zeph-core/src/agent/streaming.rs @@ -1,6 +1,6 @@ use tokio_stream::StreamExt; -use zeph_llm::provider::{LlmProvider, Message, MessagePart, Role}; -use zeph_tools::executor::{ToolError, ToolExecutor, ToolOutput}; +use zeph_llm::provider::{ChatResponse, LlmProvider, Message, MessagePart, Role, ToolDefinition}; +use zeph_tools::executor::{ToolCall, ToolError, ToolExecutor, ToolOutput}; use crate::channel::Channel; use crate::redact::redact_secrets; @@ -10,6 +10,10 @@ use super::{Agent, DOOM_LOOP_WINDOW, format_tool_output}; impl Agent { pub(crate) async fn process_response(&mut self) -> Result<(), super::error::AgentError> { + if self.provider.supports_tool_use() { + return self.process_response_native_tools().await; + } + self.doom_loop_history.clear(); for iteration in 0..self.max_tool_iterations { @@ -347,4 +351,203 @@ impl Agent Result<(), super::error::AgentError> { + self.doom_loop_history.clear(); + + let tool_defs: Vec = self + .tool_executor + .tool_definitions() + .iter() + .map(tool_def_to_definition) + .collect(); + + for iteration in 0..self.max_tool_iterations { + self.channel.send_typing().await?; + + if let Some(ref budget) = self.context_state.budget { + let used: usize = self + .messages + .iter() + .map(|m| estimate_tokens(&m.content)) + .sum(); + let threshold = budget.max_tokens() * 4 / 5; + if used >= threshold { + tracing::warn!( + iteration, + used, + threshold, + "stopping tool loop: context budget nearing limit" + ); + self.channel + .send("Stopping: context window is nearly full.") + .await?; + break; + } + } + + let chat_result = self.call_chat_with_tools(&tool_defs).await?; + + let Some(chat_result) = chat_result else { + return Ok(()); + }; + + // Text/Done → display and return + if let ChatResponse::Text(text) = &chat_result { + if !text.is_empty() { + let display = self.maybe_redact(text); + self.channel.send(&display).await?; + } + self.messages + .push(Message::from_legacy(Role::Assistant, text.as_str())); + self.persist_message(Role::Assistant, text).await; + return Ok(()); + } + + // ToolUse → execute tools and loop + let ChatResponse::ToolUse { text, tool_calls } = chat_result else { + unreachable!(); + }; + self.handle_native_tool_calls(text.as_deref(), &tool_calls) + .await?; + + if self.check_doom_loop(iteration).await? { + break; + } + } + + Ok(()) + } + + async fn call_chat_with_tools( + &mut self, + tool_defs: &[ToolDefinition], + ) -> Result, super::error::AgentError> { + let llm_timeout = std::time::Duration::from_secs(self.timeouts.llm_seconds); + let start = std::time::Instant::now(); + + let result = if let Ok(result) = tokio::time::timeout( + llm_timeout, + self.provider.chat_with_tools(&self.messages, tool_defs), + ) + .await + { + result? + } else { + self.channel + .send("LLM request timed out. Please try again.") + .await?; + return Ok(None); + }; + + let latency = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX); + self.update_metrics(|m| { + m.api_calls += 1; + m.last_llm_latency_ms = latency; + }); + + Ok(Some(result)) + } + + async fn handle_native_tool_calls( + &mut self, + text: Option<&str>, + tool_calls: &[zeph_llm::provider::ToolUseRequest], + ) -> Result<(), super::error::AgentError> { + if let Some(t) = text + && !t.is_empty() + { + let display = self.maybe_redact(t); + self.channel.send(&display).await?; + } + + let mut parts: Vec = Vec::new(); + if let Some(t) = text + && !t.is_empty() + { + parts.push(MessagePart::Text { text: t.to_owned() }); + } + for tc in tool_calls { + parts.push(MessagePart::ToolUse { + id: tc.id.clone(), + name: tc.name.clone(), + input: tc.input.clone(), + }); + } + let assistant_msg = Message::from_parts(Role::Assistant, parts); + self.persist_message(Role::Assistant, &assistant_msg.content) + .await; + self.messages.push(assistant_msg); + + let mut result_parts: Vec = Vec::new(); + for tc in tool_calls { + let params: std::collections::HashMap = + if let serde_json::Value::Object(map) = &tc.input { + map.iter().map(|(k, v)| (k.clone(), v.clone())).collect() + } else { + std::collections::HashMap::new() + }; + + let call = ToolCall { + tool_id: tc.name.clone(), + params, + }; + + let (output, is_error) = match self.tool_executor.execute_tool_call(&call).await { + Ok(Some(out)) => (out.summary, false), + Ok(None) => ("(no output)".to_owned(), false), + Err(e) => (format!("[error] {e}"), true), + }; + + let processed = self.maybe_summarize_tool_output(&output).await; + let formatted = format_tool_output(&tc.name, &processed); + let display = self.maybe_redact(&formatted); + self.channel.send(&display).await?; + + result_parts.push(MessagePart::ToolResult { + tool_use_id: tc.id.clone(), + content: processed, + is_error, + }); + } + + let user_msg = Message::from_parts(Role::User, result_parts); + self.persist_message(Role::User, &user_msg.content).await; + self.messages.push(user_msg); + + Ok(()) + } + + /// Returns `true` if a doom loop was detected and the caller should break. + async fn check_doom_loop( + &mut self, + iteration: usize, + ) -> Result { + if let Some(last_msg) = self.messages.last() { + self.doom_loop_history.push(last_msg.content.clone()); + if self.doom_loop_history.len() >= DOOM_LOOP_WINDOW { + let recent = + &self.doom_loop_history[self.doom_loop_history.len() - DOOM_LOOP_WINDOW..]; + if recent.windows(2).all(|w| w[0] == w[1]) { + tracing::warn!( + iteration, + "doom-loop detected: {DOOM_LOOP_WINDOW} consecutive identical outputs" + ); + self.channel + .send("Stopping: detected repeated identical tool outputs.") + .await?; + return Ok(true); + } + } + } + Ok(false) + } +} + +fn tool_def_to_definition(def: &zeph_tools::registry::ToolDef) -> ToolDefinition { + ToolDefinition { + name: def.id.to_string(), + description: def.description.to_string(), + parameters: serde_json::to_value(&def.schema).unwrap_or_default(), + } } diff --git a/crates/zeph-llm/src/any.rs b/crates/zeph-llm/src/any.rs index 37df907b..f01b3d73 100644 --- a/crates/zeph-llm/src/any.rs +++ b/crates/zeph-llm/src/any.rs @@ -6,7 +6,7 @@ use crate::ollama::OllamaProvider; use crate::openai::OpenAiProvider; #[cfg(feature = "orchestrator")] use crate::orchestrator::ModelOrchestrator; -use crate::provider::{ChatStream, LlmProvider, Message, StatusTx}; +use crate::provider::{ChatResponse, ChatStream, LlmProvider, Message, StatusTx, ToolDefinition}; /// Generates a match over all `AnyProvider` variants, binding the inner provider /// and evaluating the given closure for each arm. @@ -86,6 +86,18 @@ impl LlmProvider for AnyProvider { fn name(&self) -> &'static str { delegate_provider!(self, |p| p.name()) } + + fn supports_tool_use(&self) -> bool { + delegate_provider!(self, |p| p.supports_tool_use()) + } + + async fn chat_with_tools( + &self, + messages: &[Message], + tools: &[ToolDefinition], + ) -> Result { + delegate_provider!(self, |p| p.chat_with_tools(messages, tools).await) + } } #[cfg(test)] diff --git a/crates/zeph-llm/src/claude.rs b/crates/zeph-llm/src/claude.rs index fef5b749..c317cff0 100644 --- a/crates/zeph-llm/src/claude.rs +++ b/crates/zeph-llm/src/claude.rs @@ -6,7 +6,10 @@ use eventsource_stream::Eventsource; use serde::{Deserialize, Serialize}; use tokio_stream::StreamExt; -use crate::provider::{ChatStream, LlmProvider, Message, Role, StatusTx}; +use crate::provider::{ + ChatResponse, ChatStream, LlmProvider, Message, MessagePart, Role, StatusTx, ToolDefinition, + ToolUseRequest, +}; const API_URL: &str = "https://api.anthropic.com/v1/messages"; const ANTHROPIC_VERSION: &str = "2023-06-01"; @@ -225,6 +228,77 @@ impl LlmProvider for ClaudeProvider { fn name(&self) -> &'static str { "claude" } + + fn supports_tool_use(&self) -> bool { + true + } + + async fn chat_with_tools( + &self, + messages: &[Message], + tools: &[ToolDefinition], + ) -> Result { + let (system, chat_messages) = split_messages_structured(messages); + let api_tools: Vec = tools + .iter() + .map(|t| AnthropicTool { + name: &t.name, + description: &t.description, + input_schema: &t.parameters, + }) + .collect(); + + let body = ToolRequestBody { + model: &self.model, + max_tokens: self.max_tokens, + system: system.as_deref(), + messages: &chat_messages, + tools: &api_tools, + }; + + for attempt in 0..=MAX_RETRIES { + let response = self + .client + .post(API_URL) + .header("x-api-key", &self.api_key) + .header("anthropic-version", ANTHROPIC_VERSION) + .header("content-type", "application/json") + .json(&body) + .send() + .await?; + + let status = response.status(); + + if status == reqwest::StatusCode::TOO_MANY_REQUESTS { + if attempt == MAX_RETRIES { + return Err(LlmError::RateLimited); + } + let delay = retry_delay(&response, attempt); + self.emit_status(format!( + "Claude rate limited, retrying in {}s ({}/{})", + delay.as_secs(), + attempt + 1, + MAX_RETRIES + )); + tokio::time::sleep(delay).await; + continue; + } + + let text = response.text().await.map_err(LlmError::Http)?; + + if !status.is_success() { + tracing::error!("Claude API error {status}: {text}"); + return Err(LlmError::Other(format!( + "Claude API request failed (status {status})" + ))); + } + + let resp: ToolApiResponse = serde_json::from_str(&text)?; + return Ok(parse_tool_response(resp)); + } + + Err(LlmError::RateLimited) + } } fn retry_delay(response: &reqwest::Response, attempt: u32) -> Duration { @@ -301,6 +375,170 @@ fn split_messages(messages: &[Message]) -> (Option, Vec>) (system, chat) } +#[derive(Serialize)] +struct AnthropicTool<'a> { + name: &'a str, + description: &'a str, + input_schema: &'a serde_json::Value, +} + +#[derive(Serialize)] +struct ToolRequestBody<'a> { + model: &'a str, + max_tokens: u32, + #[serde(skip_serializing_if = "Option::is_none")] + system: Option<&'a str>, + messages: &'a [StructuredApiMessage], + tools: &'a [AnthropicTool<'a>], +} + +#[derive(Serialize)] +struct StructuredApiMessage { + role: String, + content: StructuredContent, +} + +#[derive(Serialize)] +#[serde(untagged)] +enum StructuredContent { + Text(String), + Blocks(Vec), +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(tag = "type", rename_all = "snake_case")] +enum AnthropicContentBlock { + Text { + text: String, + }, + ToolUse { + id: String, + name: String, + input: serde_json::Value, + }, + ToolResult { + tool_use_id: String, + content: String, + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + is_error: bool, + }, +} + +#[derive(Deserialize)] +struct ToolApiResponse { + content: Vec, +} + +fn parse_tool_response(resp: ToolApiResponse) -> ChatResponse { + let mut text_parts = Vec::new(); + let mut tool_calls = Vec::new(); + + for block in resp.content { + match block { + AnthropicContentBlock::Text { text } => text_parts.push(text), + AnthropicContentBlock::ToolUse { id, name, input } => { + tool_calls.push(ToolUseRequest { id, name, input }); + } + AnthropicContentBlock::ToolResult { .. } => {} + } + } + + if tool_calls.is_empty() { + let combined = text_parts.join(""); + ChatResponse::Text(combined) + } else { + let text = if text_parts.is_empty() { + None + } else { + Some(text_parts.join("")) + }; + ChatResponse::ToolUse { text, tool_calls } + } +} + +fn split_messages_structured(messages: &[Message]) -> (Option, Vec) { + let mut system_parts = Vec::new(); + let mut chat = Vec::new(); + + for msg in messages { + match msg.role { + Role::System => system_parts.push(msg.to_llm_content()), + Role::User | Role::Assistant => { + let role = if msg.role == Role::User { + "user" + } else { + "assistant" + }; + let has_tool_parts = msg.parts.iter().any(|p| { + matches!( + p, + MessagePart::ToolUse { .. } | MessagePart::ToolResult { .. } + ) + }); + + if has_tool_parts { + let mut blocks = Vec::new(); + for part in &msg.parts { + match part { + MessagePart::Text { text } + | MessagePart::Recall { text } + | MessagePart::CodeContext { text } + | MessagePart::Summary { text } + | MessagePart::CrossSession { text } => { + if !text.is_empty() { + blocks.push(AnthropicContentBlock::Text { text: text.clone() }); + } + } + MessagePart::ToolOutput { + tool_name, body, .. + } => { + blocks.push(AnthropicContentBlock::Text { + text: format!("[tool output: {tool_name}]\n{body}"), + }); + } + MessagePart::ToolUse { id, name, input } => { + blocks.push(AnthropicContentBlock::ToolUse { + id: id.clone(), + name: name.clone(), + input: input.clone(), + }); + } + MessagePart::ToolResult { + tool_use_id, + content, + is_error, + } => { + blocks.push(AnthropicContentBlock::ToolResult { + tool_use_id: tool_use_id.clone(), + content: content.clone(), + is_error: *is_error, + }); + } + } + } + chat.push(StructuredApiMessage { + role: role.to_owned(), + content: StructuredContent::Blocks(blocks), + }); + } else { + chat.push(StructuredApiMessage { + role: role.to_owned(), + content: StructuredContent::Text(msg.to_llm_content().to_owned()), + }); + } + } + } + } + + let system = if system_parts.is_empty() { + None + } else { + Some(system_parts.join("\n\n")) + }; + + (system, chat) +} + #[derive(Serialize)] struct RequestBody<'a> { model: &'a str, @@ -937,6 +1175,130 @@ mod tests { assert!(stream_response.contains('4')); } + #[test] + fn anthropic_tool_serialization() { + let tool = AnthropicTool { + name: "bash", + description: "Execute a shell command", + input_schema: &serde_json::json!({ + "type": "object", + "properties": { + "command": {"type": "string"} + }, + "required": ["command"] + }), + }; + let json = serde_json::to_string(&tool).unwrap(); + assert!(json.contains("\"name\":\"bash\"")); + assert!(json.contains("\"input_schema\"")); + } + + #[test] + fn parse_tool_response_text_only() { + let resp = ToolApiResponse { + content: vec![AnthropicContentBlock::Text { + text: "Hello".into(), + }], + }; + let result = parse_tool_response(resp); + assert!(matches!(result, ChatResponse::Text(s) if s == "Hello")); + } + + #[test] + fn parse_tool_response_with_tool_use() { + let resp = ToolApiResponse { + content: vec![ + AnthropicContentBlock::Text { + text: "I'll run that".into(), + }, + AnthropicContentBlock::ToolUse { + id: "toolu_123".into(), + name: "bash".into(), + input: serde_json::json!({"command": "ls"}), + }, + ], + }; + let result = parse_tool_response(resp); + if let ChatResponse::ToolUse { text, tool_calls } = result { + assert_eq!(text.unwrap(), "I'll run that"); + assert_eq!(tool_calls.len(), 1); + assert_eq!(tool_calls[0].name, "bash"); + assert_eq!(tool_calls[0].id, "toolu_123"); + } else { + panic!("expected ToolUse"); + } + } + + #[test] + fn parse_tool_response_tool_use_only() { + let resp = ToolApiResponse { + content: vec![AnthropicContentBlock::ToolUse { + id: "toolu_456".into(), + name: "read".into(), + input: serde_json::json!({"path": "/tmp/file.txt"}), + }], + }; + let result = parse_tool_response(resp); + if let ChatResponse::ToolUse { text, tool_calls } = result { + assert!(text.is_none()); + assert_eq!(tool_calls.len(), 1); + } else { + panic!("expected ToolUse"); + } + } + + #[test] + fn parse_tool_response_json_deserialization() { + let json = r#"{"content":[{"type":"text","text":"Let me check"},{"type":"tool_use","id":"toolu_abc","name":"bash","input":{"command":"ls"}}]}"#; + let resp: ToolApiResponse = serde_json::from_str(json).unwrap(); + let result = parse_tool_response(resp); + assert!(matches!(result, ChatResponse::ToolUse { .. })); + } + + #[test] + fn split_messages_structured_with_tool_parts() { + let messages = vec![ + Message::from_parts( + Role::Assistant, + vec![ + MessagePart::Text { + text: "I'll run that".into(), + }, + MessagePart::ToolUse { + id: "t1".into(), + name: "bash".into(), + input: serde_json::json!({"command": "ls"}), + }, + ], + ), + Message::from_parts( + Role::User, + vec![MessagePart::ToolResult { + tool_use_id: "t1".into(), + content: "file1.rs".into(), + is_error: false, + }], + ), + ]; + let (system, chat) = split_messages_structured(&messages); + assert!(system.is_none()); + assert_eq!(chat.len(), 2); + + let assistant_json = serde_json::to_string(&chat[0]).unwrap(); + assert!(assistant_json.contains("tool_use")); + assert!(assistant_json.contains("\"id\":\"t1\"")); + + let user_json = serde_json::to_string(&chat[1]).unwrap(); + assert!(user_json.contains("tool_result")); + assert!(user_json.contains("\"tool_use_id\":\"t1\"")); + } + + #[test] + fn supports_tool_use_returns_true() { + let provider = ClaudeProvider::new("key".into(), "claude-sonnet-4-5-20250929".into(), 1024); + assert!(provider.supports_tool_use()); + } + #[test] fn backoff_constants() { assert_eq!(MAX_RETRIES, 3); diff --git a/crates/zeph-llm/src/openai.rs b/crates/zeph-llm/src/openai.rs index 6e7bf95a..68a94331 100644 --- a/crates/zeph-llm/src/openai.rs +++ b/crates/zeph-llm/src/openai.rs @@ -6,7 +6,10 @@ use eventsource_stream::Eventsource; use serde::{Deserialize, Serialize}; use tokio_stream::StreamExt; -use crate::provider::{ChatStream, LlmProvider, Message, Role, StatusTx}; +use crate::provider::{ + ChatResponse, ChatStream, LlmProvider, Message, MessagePart, Role, StatusTx, ToolDefinition, + ToolUseRequest, +}; pub struct OpenAiProvider { client: reqwest::Client, @@ -124,7 +127,7 @@ impl OpenAiProvider { ))); } - let resp: ChatResponse = serde_json::from_str(&text)?; + let resp: OpenAiChatResponse = serde_json::from_str(&text)?; resp.choices .first() @@ -271,6 +274,101 @@ impl LlmProvider for OpenAiProvider { fn name(&self) -> &'static str { "openai" } + + fn supports_tool_use(&self) -> bool { + true + } + + async fn chat_with_tools( + &self, + messages: &[Message], + tools: &[ToolDefinition], + ) -> Result { + let api_messages = convert_messages_structured(messages); + let reasoning = self + .reasoning_effort + .as_deref() + .map(|effort| Reasoning { effort }); + + let api_tools: Vec = tools + .iter() + .map(|t| OpenAiTool { + r#type: "function", + function: OpenAiFunction { + name: &t.name, + description: &t.description, + parameters: &t.parameters, + }, + }) + .collect(); + + let body = ToolChatRequest { + model: &self.model, + messages: &api_messages, + max_tokens: self.max_tokens, + tools: &api_tools, + reasoning, + }; + + let response = self + .client + .post(format!("{}/chat/completions", self.base_url)) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .json(&body) + .send() + .await?; + + let status = response.status(); + let text = response.text().await.map_err(LlmError::Http)?; + + if status == reqwest::StatusCode::TOO_MANY_REQUESTS { + return Err(LlmError::RateLimited); + } + + if !status.is_success() { + tracing::error!("OpenAI API error {status}: {text}"); + return Err(LlmError::Other(format!( + "OpenAI API request failed (status {status})" + ))); + } + + let resp: ToolChatResponse = serde_json::from_str(&text)?; + + let choice = resp + .choices + .into_iter() + .next() + .ok_or(LlmError::EmptyResponse { provider: "openai" })?; + + if let Some(tool_calls) = choice.message.tool_calls + && !tool_calls.is_empty() + { + let text = if choice.message.content.is_empty() { + None + } else { + Some(choice.message.content) + }; + let calls = tool_calls + .into_iter() + .map(|tc| { + let input = serde_json::from_str(&tc.function.arguments) + .unwrap_or(serde_json::Value::Object(serde_json::Map::new())); + ToolUseRequest { + id: tc.id, + name: tc.function.name, + input, + } + }) + .collect(); + return Ok(ChatResponse::ToolUse { + text, + tool_calls: calls, + }); + } + + Ok(ChatResponse::Text(choice.message.content)) + } } fn parse_sse_event(data: &str) -> Option> { @@ -338,7 +436,7 @@ struct ApiMessage<'a> { } #[derive(Deserialize)] -struct ChatResponse { +struct OpenAiChatResponse { choices: Vec, } @@ -368,6 +466,178 @@ struct StreamDelta { content: Option, } +#[derive(Serialize)] +struct OpenAiTool<'a> { + r#type: &'a str, + function: OpenAiFunction<'a>, +} + +#[derive(Serialize)] +struct OpenAiFunction<'a> { + name: &'a str, + description: &'a str, + parameters: &'a serde_json::Value, +} + +#[derive(Serialize)] +struct ToolChatRequest<'a> { + model: &'a str, + messages: &'a [StructuredApiMessage], + max_tokens: u32, + tools: &'a [OpenAiTool<'a>], + #[serde(skip_serializing_if = "Option::is_none")] + reasoning: Option>, +} + +#[derive(Serialize)] +struct StructuredApiMessage { + role: String, + content: String, + #[serde(skip_serializing_if = "Option::is_none")] + tool_calls: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + tool_call_id: Option, +} + +#[derive(Serialize)] +struct OpenAiToolCallOut { + id: String, + r#type: String, + function: OpenAiFunctionCall, +} + +#[derive(Serialize)] +struct OpenAiFunctionCall { + name: String, + arguments: String, +} + +#[derive(Deserialize)] +struct ToolChatResponse { + choices: Vec, +} + +#[derive(Deserialize)] +struct ToolChatChoice { + message: ToolChatMessage, +} + +#[derive(Deserialize)] +struct ToolChatMessage { + #[serde(default)] + content: String, + #[serde(default)] + tool_calls: Option>, +} + +#[derive(Deserialize)] +struct OpenAiToolCall { + id: String, + function: OpenAiToolCallFunction, +} + +#[derive(Deserialize)] +struct OpenAiToolCallFunction { + name: String, + arguments: String, +} + +fn convert_messages_structured(messages: &[Message]) -> Vec { + let mut result = Vec::new(); + + for msg in messages { + let has_tool_parts = msg.parts.iter().any(|p| { + matches!( + p, + MessagePart::ToolUse { .. } | MessagePart::ToolResult { .. } + ) + }); + + if has_tool_parts { + // Assistant messages with ToolUse parts → tool_calls field + if msg.role == Role::Assistant { + let text_content: String = msg + .parts + .iter() + .filter_map(|p| match p { + MessagePart::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join(""); + + let tool_calls: Vec = msg + .parts + .iter() + .filter_map(|p| match p { + MessagePart::ToolUse { id, name, input } => Some(OpenAiToolCallOut { + id: id.clone(), + r#type: "function".to_owned(), + function: OpenAiFunctionCall { + name: name.clone(), + arguments: serde_json::to_string(input).unwrap_or_default(), + }, + }), + _ => None, + }) + .collect(); + + result.push(StructuredApiMessage { + role: "assistant".to_owned(), + content: text_content, + tool_calls: if tool_calls.is_empty() { + None + } else { + Some(tool_calls) + }, + tool_call_id: None, + }); + } else { + // User messages with ToolResult parts → role: "tool" messages + for part in &msg.parts { + match part { + MessagePart::ToolResult { + tool_use_id, + content, + .. + } => { + result.push(StructuredApiMessage { + role: "tool".to_owned(), + content: content.clone(), + tool_calls: None, + tool_call_id: Some(tool_use_id.clone()), + }); + } + MessagePart::Text { text } if !text.is_empty() => { + result.push(StructuredApiMessage { + role: "user".to_owned(), + content: text.clone(), + tool_calls: None, + tool_call_id: None, + }); + } + _ => {} + } + } + } + } else { + let role = match msg.role { + Role::System => "system", + Role::User => "user", + Role::Assistant => "assistant", + }; + result.push(StructuredApiMessage { + role: role.to_owned(), + content: msg.to_llm_content().to_owned(), + tool_calls: None, + tool_call_id: None, + }); + } + } + + result +} + #[derive(Serialize)] struct EmbeddingRequest<'a> { input: &'a str, @@ -558,7 +828,7 @@ mod tests { #[test] fn parse_chat_response() { let json = r#"{"choices":[{"message":{"content":"Hello!"}}]}"#; - let resp: ChatResponse = serde_json::from_str(json).unwrap(); + let resp: OpenAiChatResponse = serde_json::from_str(json).unwrap(); assert_eq!(resp.choices.len(), 1); assert_eq!(resp.choices[0].message.content, "Hello!"); } @@ -745,7 +1015,7 @@ mod tests { #[test] fn chat_response_empty_choices() { let json = r#"{"choices":[]}"#; - let resp: ChatResponse = serde_json::from_str(json).unwrap(); + let resp: OpenAiChatResponse = serde_json::from_str(json).unwrap(); assert!(resp.choices.is_empty()); } @@ -855,4 +1125,104 @@ mod tests { let embedding = provider.embed("Hello world").await.unwrap(); assert!(!embedding.is_empty()); } + + #[test] + fn supports_tool_use_returns_true() { + assert!(test_provider().supports_tool_use()); + } + + #[test] + fn openai_tool_serialization() { + let tool = OpenAiTool { + r#type: "function", + function: OpenAiFunction { + name: "bash", + description: "Execute a shell command", + parameters: &serde_json::json!({ + "type": "object", + "properties": {"command": {"type": "string"}}, + "required": ["command"] + }), + }, + }; + let json = serde_json::to_string(&tool).unwrap(); + assert!(json.contains("\"type\":\"function\"")); + assert!(json.contains("\"name\":\"bash\"")); + assert!(json.contains("\"parameters\"")); + } + + #[test] + fn parse_tool_chat_response_with_tool_calls() { + let json = r#"{ + "choices": [{ + "message": { + "content": "I'll run that", + "tool_calls": [{ + "id": "call_123", + "type": "function", + "function": { + "name": "bash", + "arguments": "{\"command\":\"ls\"}" + } + }] + } + }] + }"#; + let resp: ToolChatResponse = serde_json::from_str(json).unwrap(); + assert_eq!(resp.choices.len(), 1); + let tc = resp.choices[0].message.tool_calls.as_ref().unwrap(); + assert_eq!(tc.len(), 1); + assert_eq!(tc[0].id, "call_123"); + assert_eq!(tc[0].function.name, "bash"); + } + + #[test] + fn parse_tool_chat_response_text_only() { + let json = r#"{"choices":[{"message":{"content":"Hello!"}}]}"#; + let resp: ToolChatResponse = serde_json::from_str(json).unwrap(); + assert!(resp.choices[0].message.tool_calls.is_none()); + } + + #[test] + fn convert_messages_structured_with_tool_parts() { + let messages = vec![ + Message::from_parts( + Role::Assistant, + vec![ + MessagePart::Text { + text: "Running command".into(), + }, + MessagePart::ToolUse { + id: "call_1".into(), + name: "bash".into(), + input: serde_json::json!({"command": "ls"}), + }, + ], + ), + Message::from_parts( + Role::User, + vec![MessagePart::ToolResult { + tool_use_id: "call_1".into(), + content: "file1.rs".into(), + is_error: false, + }], + ), + ]; + let result = convert_messages_structured(&messages); + assert_eq!(result.len(), 2); + assert_eq!(result[0].role, "assistant"); + assert!(result[0].tool_calls.is_some()); + assert_eq!(result[1].role, "tool"); + assert_eq!(result[1].tool_call_id.as_deref(), Some("call_1")); + } + + #[test] + fn convert_messages_structured_plain_messages() { + let messages = vec![Message::from_legacy(Role::User, "hello")]; + let result = convert_messages_structured(&messages); + assert_eq!(result.len(), 1); + assert_eq!(result[0].role, "user"); + assert_eq!(result[0].content, "hello"); + assert!(result[0].tool_calls.is_none()); + } } diff --git a/crates/zeph-llm/src/provider.rs b/crates/zeph-llm/src/provider.rs index 539fc910..68f93f3c 100644 --- a/crates/zeph-llm/src/provider.rs +++ b/crates/zeph-llm/src/provider.rs @@ -9,6 +9,38 @@ use crate::error::LlmError; /// Boxed stream of string chunks from an LLM provider. pub type ChatStream = Pin> + Send>>; +/// Minimal tool definition for LLM providers. +/// +/// Decoupled from `zeph-tools::ToolDef` to avoid cross-crate dependency. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolDefinition { + pub name: String, + pub description: String, + /// JSON Schema object describing parameters. + pub parameters: serde_json::Value, +} + +/// Structured tool invocation request from the model. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolUseRequest { + pub id: String, + pub name: String, + pub input: serde_json::Value, +} + +/// Response from `chat_with_tools()`. +#[derive(Debug, Clone)] +pub enum ChatResponse { + /// Model produced text output only. + Text(String), + /// Model requests one or more tool invocations. + ToolUse { + /// Any text the model emitted before/alongside tool calls. + text: Option, + tool_calls: Vec, + }, +} + /// Boxed future returning an embedding vector. pub type EmbedFuture = Pin, LlmError>> + Send>>; @@ -50,6 +82,17 @@ pub enum MessagePart { CrossSession { text: String, }, + ToolUse { + id: String, + name: String, + input: serde_json::Value, + }, + ToolResult { + tool_use_id: String, + content: String, + #[serde(default)] + is_error: bool, + }, } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -113,6 +156,16 @@ impl Message { let _ = write!(out, "[tool output: {tool_name}]\n```\n{body}\n```"); } } + MessagePart::ToolUse { id, name, .. } => { + let _ = write!(out, "[tool_use: {name}({id})]"); + } + MessagePart::ToolResult { + tool_use_id, + content, + .. + } => { + let _ = write!(out, "[tool_result: {tool_use_id}]\n{content}"); + } } } out @@ -159,6 +212,30 @@ pub trait LlmProvider: Send + Sync { /// Provider name for logging and identification. fn name(&self) -> &'static str; + + /// Whether this provider supports native `tool_use` / function calling. + fn supports_tool_use(&self) -> bool { + false + } + + /// Send messages with tool definitions, returning a structured response. + /// + /// Default: falls back to `chat()` and wraps the result in `ChatResponse::Text`. + /// + /// # Errors + /// + /// Returns an error if the provider fails to communicate or the response is invalid. + fn chat_with_tools( + &self, + messages: &[Message], + tools: &[ToolDefinition], + ) -> impl Future> + Send { + async { + let _ = tools; + let text = self.chat(messages).await?; + Ok(ChatResponse::Text(text)) + } + } } #[cfg(test)] @@ -504,6 +581,132 @@ mod tests { assert!(!msg.content.contains("original")); } + #[test] + fn message_part_tool_use_serde_round_trip() { + let part = MessagePart::ToolUse { + id: "toolu_123".into(), + name: "bash".into(), + input: serde_json::json!({"command": "ls"}), + }; + let json = serde_json::to_string(&part).unwrap(); + let deserialized: MessagePart = serde_json::from_str(&json).unwrap(); + if let MessagePart::ToolUse { id, name, input } = deserialized { + assert_eq!(id, "toolu_123"); + assert_eq!(name, "bash"); + assert_eq!(input["command"], "ls"); + } else { + panic!("expected ToolUse"); + } + } + + #[test] + fn message_part_tool_result_serde_round_trip() { + let part = MessagePart::ToolResult { + tool_use_id: "toolu_123".into(), + content: "file1.rs\nfile2.rs".into(), + is_error: false, + }; + let json = serde_json::to_string(&part).unwrap(); + let deserialized: MessagePart = serde_json::from_str(&json).unwrap(); + if let MessagePart::ToolResult { + tool_use_id, + content, + is_error, + } = deserialized + { + assert_eq!(tool_use_id, "toolu_123"); + assert_eq!(content, "file1.rs\nfile2.rs"); + assert!(!is_error); + } else { + panic!("expected ToolResult"); + } + } + + #[test] + fn message_part_tool_result_is_error_default() { + let json = r#"{"kind":"tool_result","tool_use_id":"id","content":"err"}"#; + let part: MessagePart = serde_json::from_str(json).unwrap(); + if let MessagePart::ToolResult { is_error, .. } = part { + assert!(!is_error); + } else { + panic!("expected ToolResult"); + } + } + + #[test] + fn chat_response_construction() { + let text = ChatResponse::Text("hello".into()); + assert!(matches!(text, ChatResponse::Text(s) if s == "hello")); + + let tool_use = ChatResponse::ToolUse { + text: Some("I'll run that".into()), + tool_calls: vec![ToolUseRequest { + id: "1".into(), + name: "bash".into(), + input: serde_json::json!({}), + }], + }; + assert!(matches!(tool_use, ChatResponse::ToolUse { .. })); + } + + #[test] + fn flatten_parts_tool_use() { + let msg = Message::from_parts( + Role::Assistant, + vec![MessagePart::ToolUse { + id: "t1".into(), + name: "bash".into(), + input: serde_json::json!({"command": "ls"}), + }], + ); + assert!(msg.content.contains("[tool_use: bash(t1)]")); + } + + #[test] + fn flatten_parts_tool_result() { + let msg = Message::from_parts( + Role::User, + vec![MessagePart::ToolResult { + tool_use_id: "t1".into(), + content: "output here".into(), + is_error: false, + }], + ); + assert!(msg.content.contains("[tool_result: t1]")); + assert!(msg.content.contains("output here")); + } + + #[test] + fn tool_definition_serde_round_trip() { + let def = ToolDefinition { + name: "bash".into(), + description: "Execute a shell command".into(), + parameters: serde_json::json!({"type": "object"}), + }; + let json = serde_json::to_string(&def).unwrap(); + let deserialized: ToolDefinition = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.name, "bash"); + assert_eq!(deserialized.description, "Execute a shell command"); + } + + #[tokio::test] + async fn supports_tool_use_default_returns_false() { + let provider = StubProvider { + response: String::new(), + }; + assert!(!provider.supports_tool_use()); + } + + #[tokio::test] + async fn chat_with_tools_default_delegates_to_chat() { + let provider = StubProvider { + response: "hello".into(), + }; + let messages = vec![Message::from_legacy(Role::User, "test")]; + let result = provider.chat_with_tools(&messages, &[]).await.unwrap(); + assert!(matches!(result, ChatResponse::Text(s) if s == "hello")); + } + #[test] fn tool_output_compacted_at_serde_default() { let json = r#"{"kind":"tool_output","tool_name":"bash","body":"out"}"#; From fb7e96481f9522774fb5f1f4945d1f45825bb831 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 19:23:23 +0100 Subject: [PATCH 02/11] docs: update CHANGELOG for M20 native tool_use --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59203f41..bf3a3e40 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Added +- Native tool_use support for Claude provider (Anthropic API format) (#256) +- Native function calling support for OpenAI provider (#257) +- `ToolDefinition`, `ChatResponse`, `ToolUseRequest` types in zeph-llm (#254) +- `ToolUse`/`ToolResult` variants in `MessagePart` for structured tool flow (#255) +- Dual-mode agent loop: native structured path alongside legacy text extraction (#258) + ## [0.9.7] - 2026-02-15 ### Performance From 601a892d96e5d238d4b1fe2162dde5fc7b3440af Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 19:38:29 +0100 Subject: [PATCH 03/11] fix: delegate supports_tool_use and chat_with_tools through orchestrator SubProvider and ModelOrchestrator were missing delegation for the new trait methods, causing the agent loop to always take the legacy text extraction path even when the underlying provider supports native tool_use. Add debug logging for path selection. --- crates/zeph-core/src/agent/streaming.rs | 2 ++ crates/zeph-llm/src/orchestrator/mod.rs | 20 ++++++++++++++- crates/zeph-llm/src/orchestrator/router.rs | 30 +++++++++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/crates/zeph-core/src/agent/streaming.rs b/crates/zeph-core/src/agent/streaming.rs index 26dce2b2..f439a5a8 100644 --- a/crates/zeph-core/src/agent/streaming.rs +++ b/crates/zeph-core/src/agent/streaming.rs @@ -11,9 +11,11 @@ use super::{Agent, DOOM_LOOP_WINDOW, format_tool_output}; impl Agent { pub(crate) async fn process_response(&mut self) -> Result<(), super::error::AgentError> { if self.provider.supports_tool_use() { + tracing::debug!(provider = self.provider.name(), "using native tool_use path"); return self.process_response_native_tools().await; } + tracing::debug!(provider = self.provider.name(), "using legacy text extraction path"); self.doom_loop_history.clear(); for iteration in 0..self.max_tool_iterations { diff --git a/crates/zeph-llm/src/orchestrator/mod.rs b/crates/zeph-llm/src/orchestrator/mod.rs index ec7ab225..56ad5ce1 100644 --- a/crates/zeph-llm/src/orchestrator/mod.rs +++ b/crates/zeph-llm/src/orchestrator/mod.rs @@ -7,7 +7,7 @@ pub use router::SubProvider; use std::collections::HashMap; use crate::error::LlmError; -use crate::provider::{ChatStream, LlmProvider, Message, StatusTx}; +use crate::provider::{ChatResponse, ChatStream, LlmProvider, Message, StatusTx, ToolDefinition}; #[derive(Debug, Clone)] pub struct ModelOrchestrator { @@ -208,6 +208,24 @@ impl LlmProvider for ModelOrchestrator { .is_some_and(LlmProvider::supports_embeddings) } + fn supports_tool_use(&self) -> bool { + self.providers + .get(&self.default_provider) + .is_some_and(LlmProvider::supports_tool_use) + } + + async fn chat_with_tools( + &self, + messages: &[Message], + tools: &[ToolDefinition], + ) -> Result { + let provider = self + .providers + .get(&self.default_provider) + .ok_or(LlmError::NoProviders)?; + provider.chat_with_tools(messages, tools).await + } + fn name(&self) -> &'static str { "orchestrator" } diff --git a/crates/zeph-llm/src/orchestrator/router.rs b/crates/zeph-llm/src/orchestrator/router.rs index 5b9e1d71..361d227a 100644 --- a/crates/zeph-llm/src/orchestrator/router.rs +++ b/crates/zeph-llm/src/orchestrator/router.rs @@ -6,7 +6,9 @@ use crate::claude::ClaudeProvider; use crate::ollama::OllamaProvider; #[cfg(feature = "openai")] use crate::openai::OpenAiProvider; -use crate::provider::{ChatStream, LlmProvider, Message, StatusTx}; +use crate::provider::{ + ChatResponse, ChatStream, LlmProvider, Message, StatusTx, ToolDefinition, +}; /// Inner provider enum without the Orchestrator variant to break recursive type cycles. #[derive(Debug, Clone)] @@ -92,6 +94,32 @@ impl LlmProvider for SubProvider { } } + fn supports_tool_use(&self) -> bool { + match self { + Self::Ollama(p) => p.supports_tool_use(), + Self::Claude(p) => p.supports_tool_use(), + #[cfg(feature = "openai")] + Self::OpenAi(p) => p.supports_tool_use(), + #[cfg(feature = "candle")] + Self::Candle(p) => p.supports_tool_use(), + } + } + + async fn chat_with_tools( + &self, + messages: &[Message], + tools: &[ToolDefinition], + ) -> Result { + match self { + Self::Ollama(p) => p.chat_with_tools(messages, tools).await, + Self::Claude(p) => p.chat_with_tools(messages, tools).await, + #[cfg(feature = "openai")] + Self::OpenAi(p) => p.chat_with_tools(messages, tools).await, + #[cfg(feature = "candle")] + Self::Candle(p) => p.chat_with_tools(messages, tools).await, + } + } + fn name(&self) -> &'static str { match self { Self::Ollama(p) => p.name(), From ff59aa616491110b7fd26cf9991ea70cfd29ffee Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 21:02:07 +0100 Subject: [PATCH 04/11] fix: implement execute_tool_call for ShellExecutor ShellExecutor was missing execute_tool_call() implementation, inheriting the default that returns None. Native tool_use calls to bash were silently producing no output. Extract command from structured params and delegate to existing execute_inner pipeline. --- crates/zeph-tools/src/shell.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/crates/zeph-tools/src/shell.rs b/crates/zeph-tools/src/shell.rs index 38baa1ea..6a1d21ea 100644 --- a/crates/zeph-tools/src/shell.rs +++ b/crates/zeph-tools/src/shell.rs @@ -7,7 +7,7 @@ use schemars::JsonSchema; use crate::audit::{AuditEntry, AuditLogger, AuditResult}; use crate::config::ShellConfig; -use crate::executor::{ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput}; +use crate::executor::{ToolCall, ToolError, ToolEvent, ToolEventTx, ToolExecutor, ToolOutput}; use crate::permissions::{PermissionAction, PermissionPolicy}; const DEFAULT_BLOCKED: &[&str] = &[ @@ -289,6 +289,26 @@ impl ToolExecutor for ShellExecutor { invocation: InvocationHint::FencedBlock("bash"), }] } + + async fn execute_tool_call( + &self, + call: &ToolCall, + ) -> Result, ToolError> { + if call.tool_id != "bash" { + return Ok(None); + } + let command = call + .params + .get("command") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + if command.is_empty() { + return Ok(None); + } + // Wrap as a fenced block so execute_inner can extract and run it + let synthetic = format!("```bash\n{command}\n```"); + self.execute_inner(&synthetic, false).await + } } fn extract_paths(code: &str) -> Vec<&str> { From 11fb99a9f5a8f0053f83137d3d0cedb32fcd75ec Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 21:25:32 +0100 Subject: [PATCH 05/11] fix: skip text-based tool instructions when using native tool_use When the provider supports native tool_use, the tools are passed via the API tools parameter. The text-based tool catalog in the system prompt was causing Claude to respond with fenced code blocks instead of structured tool_use calls. Skip prompt-based tool instructions for native tool_use providers. --- crates/zeph-core/src/agent/context.rs | 5 ++++- crates/zeph-core/src/agent/streaming.rs | 5 ++++- crates/zeph-llm/src/claude.rs | 6 +++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs index 3b2c111e..d09986f6 100644 --- a/crates/zeph-core/src/agent/context.rs +++ b/crates/zeph-core/src/agent/context.rs @@ -649,7 +649,10 @@ impl Agent Agent Date: Sun, 15 Feb 2026 22:03:46 +0100 Subject: [PATCH 06/11] fix: use async fn for chat_with_tools trait method The trait default using fn -> impl Future (RPITIT) with a body was not being overridden by async fn implementations in provider structs, causing the default fallback to chat() to always be called. Change the trait definition to async fn which correctly dispatches to overrides. --- crates/zeph-core/src/agent/streaming.rs | 12 ++++++++++++ crates/zeph-llm/src/any.rs | 5 +++++ crates/zeph-llm/src/orchestrator/mod.rs | 6 ++++++ crates/zeph-llm/src/provider.rs | 13 ++++++------- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/crates/zeph-core/src/agent/streaming.rs b/crates/zeph-core/src/agent/streaming.rs index 8e2db7b6..0925ef7a 100644 --- a/crates/zeph-core/src/agent/streaming.rs +++ b/crates/zeph-core/src/agent/streaming.rs @@ -364,6 +364,12 @@ impl Agent>(), + "native tool_use: collected tool definitions" + ); + for iteration in 0..self.max_tool_iterations { self.channel.send_typing().await?; @@ -428,6 +434,12 @@ impl Agent Result, super::error::AgentError> { + tracing::warn!( + tool_count = tool_defs.len(), + provider_name = self.provider.name(), + supports = self.provider.supports_tool_use(), + "call_chat_with_tools ENTERED" + ); let llm_timeout = std::time::Duration::from_secs(self.timeouts.llm_seconds); let start = std::time::Instant::now(); diff --git a/crates/zeph-llm/src/any.rs b/crates/zeph-llm/src/any.rs index f01b3d73..40a0450c 100644 --- a/crates/zeph-llm/src/any.rs +++ b/crates/zeph-llm/src/any.rs @@ -96,6 +96,11 @@ impl LlmProvider for AnyProvider { messages: &[Message], tools: &[ToolDefinition], ) -> Result { + tracing::warn!( + provider = self.name(), + tool_count = tools.len(), + "AnyProvider::chat_with_tools called" + ); delegate_provider!(self, |p| p.chat_with_tools(messages, tools).await) } } diff --git a/crates/zeph-llm/src/orchestrator/mod.rs b/crates/zeph-llm/src/orchestrator/mod.rs index 56ad5ce1..f6f70698 100644 --- a/crates/zeph-llm/src/orchestrator/mod.rs +++ b/crates/zeph-llm/src/orchestrator/mod.rs @@ -223,6 +223,12 @@ impl LlmProvider for ModelOrchestrator { .providers .get(&self.default_provider) .ok_or(LlmError::NoProviders)?; + tracing::debug!( + default_provider = %self.default_provider, + tool_count = tools.len(), + provider_supports_tool_use = provider.supports_tool_use(), + "orchestrator delegating chat_with_tools" + ); provider.chat_with_tools(messages, tools).await } diff --git a/crates/zeph-llm/src/provider.rs b/crates/zeph-llm/src/provider.rs index 68f93f3c..97b7b3fa 100644 --- a/crates/zeph-llm/src/provider.rs +++ b/crates/zeph-llm/src/provider.rs @@ -225,16 +225,15 @@ pub trait LlmProvider: Send + Sync { /// # Errors /// /// Returns an error if the provider fails to communicate or the response is invalid. - fn chat_with_tools( + #[allow(async_fn_in_trait)] + async fn chat_with_tools( &self, messages: &[Message], tools: &[ToolDefinition], - ) -> impl Future> + Send { - async { - let _ = tools; - let text = self.chat(messages).await?; - Ok(ChatResponse::Text(text)) - } + ) -> Result { + let _ = tools; + let text = self.chat(messages).await?; + Ok(ChatResponse::Text(text)) } } From 56913b38d1293152247ebe896e11bdba9e4d1d05 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 22:12:13 +0100 Subject: [PATCH 07/11] fix: simplify chat_with_tools default body in LlmProvider trait --- crates/zeph-llm/src/provider.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/crates/zeph-llm/src/provider.rs b/crates/zeph-llm/src/provider.rs index 97b7b3fa..b6d405d2 100644 --- a/crates/zeph-llm/src/provider.rs +++ b/crates/zeph-llm/src/provider.rs @@ -229,11 +229,9 @@ pub trait LlmProvider: Send + Sync { async fn chat_with_tools( &self, messages: &[Message], - tools: &[ToolDefinition], + _tools: &[ToolDefinition], ) -> Result { - let _ = tools; - let text = self.chat(messages).await?; - Ok(ChatResponse::Text(text)) + Ok(ChatResponse::Text(self.chat(messages).await?)) } } From a1bce289f73e25f8d49611219606a25bc1e23299 Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 22:18:31 +0100 Subject: [PATCH 08/11] fix: replace fenced-block instructions with native tool_use prompt for capable providers Split BASE_PROMPT into header/legacy/native/tail sections. When provider supports tool_use, system prompt instructs to use structured tool mechanism instead of fenced code blocks. This was the root cause of Claude returning text with backtick blocks despite tools being passed via API. --- crates/zeph-core/src/agent/context.rs | 7 ++++- crates/zeph-core/src/agent/mod.rs | 4 +-- crates/zeph-core/src/context.rs | 45 ++++++++++++++++++--------- 3 files changed, 39 insertions(+), 17 deletions(-) diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs index d09986f6..15499831 100644 --- a/crates/zeph-core/src/agent/context.rs +++ b/crates/zeph-core/src/agent/context.rs @@ -663,7 +663,12 @@ impl Agent Agent Agenttest", None, None); + let prompt = build_system_prompt("test", None, None, false); assert!(prompt.contains("You are Zeph")); assert!(prompt.contains("")); } @@ -319,7 +336,7 @@ mod tests { os: "linux".into(), model_name: "test".into(), }; - let prompt = build_system_prompt("skills here", Some(&env), None); + let prompt = build_system_prompt("skills here", Some(&env), None, false); assert!(prompt.contains("You are Zeph")); assert!(prompt.contains("")); assert!(prompt.contains("skills here")); @@ -327,7 +344,7 @@ mod tests { #[test] fn build_system_prompt_without_env() { - let prompt = build_system_prompt("skills here", None, None); + let prompt = build_system_prompt("skills here", None, None, false); assert!(prompt.contains("You are Zeph")); assert!(!prompt.contains("")); assert!(prompt.contains("skills here")); @@ -335,7 +352,7 @@ mod tests { #[test] fn base_prompt_contains_guidelines() { - let prompt = build_system_prompt("", None, None); + let prompt = build_system_prompt("", None, None, false); assert!(prompt.contains("## Tool Use")); assert!(prompt.contains("## Guidelines")); assert!(prompt.contains("## Security")); From ace50d7eaeba73556244814580df98b1fd48d89f Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 22:23:14 +0100 Subject: [PATCH 09/11] chore: demote debug WARN logs to DEBUG in tool_use dispatch chain --- crates/zeph-core/src/agent/streaming.rs | 5 ++--- crates/zeph-llm/src/any.rs | 5 ----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/zeph-core/src/agent/streaming.rs b/crates/zeph-core/src/agent/streaming.rs index 0925ef7a..acb6ec71 100644 --- a/crates/zeph-core/src/agent/streaming.rs +++ b/crates/zeph-core/src/agent/streaming.rs @@ -434,11 +434,10 @@ impl Agent Result, super::error::AgentError> { - tracing::warn!( + tracing::debug!( tool_count = tool_defs.len(), provider_name = self.provider.name(), - supports = self.provider.supports_tool_use(), - "call_chat_with_tools ENTERED" + "call_chat_with_tools" ); let llm_timeout = std::time::Duration::from_secs(self.timeouts.llm_seconds); let start = std::time::Instant::now(); diff --git a/crates/zeph-llm/src/any.rs b/crates/zeph-llm/src/any.rs index 40a0450c..f01b3d73 100644 --- a/crates/zeph-llm/src/any.rs +++ b/crates/zeph-llm/src/any.rs @@ -96,11 +96,6 @@ impl LlmProvider for AnyProvider { messages: &[Message], tools: &[ToolDefinition], ) -> Result { - tracing::warn!( - provider = self.name(), - tool_count = tools.len(), - "AnyProvider::chat_with_tools called" - ); delegate_provider!(self, |p| p.chat_with_tools(messages, tools).await) } } From bd565c0a81cade4fbd5f5f50f793a56bef631fbc Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 22:27:18 +0100 Subject: [PATCH 10/11] docs: add native tool_use documentation and update changelog Add Native Tool Use section to tools guide describing the structured API-level tool calling mechanism for Claude and OpenAI providers. Rename existing section to Legacy Text Extraction. Update README with native tool use feature entry and architecture description. Add system prompt split entry to changelog. --- CHANGELOG.md | 1 + README.md | 3 ++- docs/src/guide/tools.md | 19 +++++++++++++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf3a3e40..f665f53d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - `ToolDefinition`, `ChatResponse`, `ToolUseRequest` types in zeph-llm (#254) - `ToolUse`/`ToolResult` variants in `MessagePart` for structured tool flow (#255) - Dual-mode agent loop: native structured path alongside legacy text extraction (#258) +- Dual system prompt: native tool_use instructions for capable providers, fenced-block instructions for legacy providers ## [0.9.7] - 2026-02-15 diff --git a/README.md b/README.md index 6c95c363..049ded96 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ cargo build --release --features tui | Feature | Description | Docs | |---------|-------------|------| +| **Native Tool Use** | Structured tool calling via Claude tool_use and OpenAI function calling APIs; automatic fallback to text extraction for local models | [Tools](https://bug-ops.github.io/zeph/guide/tools.html) | | **Hybrid Inference** | Ollama, Claude, OpenAI, Candle (GGUF) — local, cloud, or both | [OpenAI](https://bug-ops.github.io/zeph/guide/openai.html) · [Candle](https://bug-ops.github.io/zeph/guide/candle.html) | | **Skills-First Architecture** | Embedding-based top-K matching, progressive loading, hot-reload | [Skills](https://bug-ops.github.io/zeph/guide/skills.html) | | **Code Indexing** | AST-based chunking (tree-sitter), semantic retrieval, repo map generation, incremental indexing | [Code Indexing](https://bug-ops.github.io/zeph/guide/code-indexing.html) | @@ -109,7 +110,7 @@ zeph (binary) — bootstrap, AnyChannel dispatch, vault resolution (anyhow for t ├── zeph-core — Agent split into 7 submodules (context, streaming, persistence, │ learning, mcp, index), typed AgentError/ChannelError, config hot-reload ├── zeph-llm — LlmProvider: Ollama, Claude, OpenAI, Candle, orchestrator, -│ typed LlmError, EmbedFuture/EmbedFn type aliases +│ native tool_use (Claude/OpenAI), typed LlmError ├── zeph-skills — SKILL.md parser, embedding matcher, hot-reload, self-learning, typed SkillError ├── zeph-memory — SQLite + Qdrant, semantic recall, summarization, typed MemoryError ├── zeph-index — AST-based code indexing, semantic retrieval, repo map (optional) diff --git a/docs/src/guide/tools.md b/docs/src/guide/tools.md index 05fb1221..4738a9f1 100644 --- a/docs/src/guide/tools.md +++ b/docs/src/guide/tools.md @@ -27,9 +27,24 @@ Each tool executor declares its definitions via `tool_definitions()`. On every L See [Security](../security.md#file-executor-sandbox) for details on the path validation mechanism. -## Dual-Mode Execution +## Native Tool Use -The agent loop supports two tool invocation modes, distinguished by `InvocationHint` on each `ToolDef`: +Providers that support structured tool calling (Claude, OpenAI) use the native API-level tool mechanism instead of text-based fenced blocks. The agent detects this via `LlmProvider::supports_tool_use()` and switches to the native path automatically. + +In native mode: + +- Tool definitions (name, description, JSON Schema parameters) are passed to the LLM API alongside the messages. +- The LLM returns structured `tool_use` content blocks with typed parameters. +- The agent executes each tool call and sends results back as `tool_result` messages. +- The system prompt instructs the LLM to use the structured mechanism, not fenced code blocks. + +The native path uses the same tool executors and permission checks as the legacy path. The only difference is how tools are invoked and results are returned — structured JSON instead of text parsing. + +Types involved: `ToolDefinition` (name + description + JSON Schema), `ChatResponse` (Text or ToolUse), `ToolUseRequest` (id + name + input), and `ToolUse`/`ToolResult` variants in `MessagePart`. + +## Legacy Text Extraction + +Providers without native tool support (Ollama, Candle) use text-based tool invocation, distinguished by `InvocationHint` on each `ToolDef`: 1. **Fenced block** (`InvocationHint::FencedBlock("bash")` / `FencedBlock("scrape")`) — the LLM emits a fenced code block with the specified tag. `ShellExecutor` handles ` ```bash ` blocks, `WebScrapeExecutor` handles ` ```scrape ` blocks containing JSON with CSS selectors. 2. **Structured tool call** (`InvocationHint::ToolCall`) — the LLM emits a `ToolCall` with `tool_id` and typed `params`. `CompositeExecutor` routes the call to `FileExecutor` for file tools. From d3b01031cbf9a5ee20cf47d22695f9567a5b514b Mon Sep 17 00:00:00 2001 From: "Andrei G." Date: Sun, 15 Feb 2026 22:28:27 +0100 Subject: [PATCH 11/11] style: apply rustfmt formatting --- crates/zeph-core/src/agent/context.rs | 13 ++++++------- crates/zeph-core/src/agent/streaming.rs | 10 ++++++++-- crates/zeph-core/src/context.rs | 7 ++++++- crates/zeph-llm/src/orchestrator/router.rs | 4 +--- crates/zeph-tools/src/shell.rs | 5 +---- 5 files changed, 22 insertions(+), 17 deletions(-) diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs index 15499831..bef1a675 100644 --- a/crates/zeph-core/src/agent/context.rs +++ b/crates/zeph-core/src/agent/context.rs @@ -662,13 +662,12 @@ impl Agent Agent { pub(crate) async fn process_response(&mut self) -> Result<(), super::error::AgentError> { if self.provider.supports_tool_use() { - tracing::debug!(provider = self.provider.name(), "using native tool_use path"); + tracing::debug!( + provider = self.provider.name(), + "using native tool_use path" + ); return self.process_response_native_tools().await; } - tracing::debug!(provider = self.provider.name(), "using legacy text extraction path"); + tracing::debug!( + provider = self.provider.name(), + "using legacy text extraction path" + ); self.doom_loop_history.clear(); for iteration in 0..self.max_tool_iterations { diff --git a/crates/zeph-core/src/context.rs b/crates/zeph-core/src/context.rs index 1ad7ba82..401665f0 100644 --- a/crates/zeph-core/src/context.rs +++ b/crates/zeph-core/src/context.rs @@ -222,7 +222,12 @@ mod tests { #[test] fn with_skills() { - let prompt = build_system_prompt("test", None, None, false); + let prompt = build_system_prompt( + "test", + None, + None, + false, + ); assert!(prompt.contains("You are Zeph")); assert!(prompt.contains("")); } diff --git a/crates/zeph-llm/src/orchestrator/router.rs b/crates/zeph-llm/src/orchestrator/router.rs index 361d227a..64b83fad 100644 --- a/crates/zeph-llm/src/orchestrator/router.rs +++ b/crates/zeph-llm/src/orchestrator/router.rs @@ -6,9 +6,7 @@ use crate::claude::ClaudeProvider; use crate::ollama::OllamaProvider; #[cfg(feature = "openai")] use crate::openai::OpenAiProvider; -use crate::provider::{ - ChatResponse, ChatStream, LlmProvider, Message, StatusTx, ToolDefinition, -}; +use crate::provider::{ChatResponse, ChatStream, LlmProvider, Message, StatusTx, ToolDefinition}; /// Inner provider enum without the Orchestrator variant to break recursive type cycles. #[derive(Debug, Clone)] diff --git a/crates/zeph-tools/src/shell.rs b/crates/zeph-tools/src/shell.rs index 6a1d21ea..d5223635 100644 --- a/crates/zeph-tools/src/shell.rs +++ b/crates/zeph-tools/src/shell.rs @@ -290,10 +290,7 @@ impl ToolExecutor for ShellExecutor { }] } - async fn execute_tool_call( - &self, - call: &ToolCall, - ) -> Result, ToolError> { + async fn execute_tool_call(&self, call: &ToolCall) -> Result, ToolError> { if call.tool_id != "bash" { return Ok(None); }