Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [Unreleased]

### Added
- Native tool_use support for Claude provider (Anthropic API format) (#256)
- Native function calling support for OpenAI provider (#257)
- `ToolDefinition`, `ChatResponse`, `ToolUseRequest` types in zeph-llm (#254)
- `ToolUse`/`ToolResult` variants in `MessagePart` for structured tool flow (#255)
- Dual-mode agent loop: native structured path alongside legacy text extraction (#258)
- Dual system prompt: native tool_use instructions for capable providers, fenced-block instructions for legacy providers

## [0.9.7] - 2026-02-15

### Performance
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ cargo build --release --features tui

| Feature | Description | Docs |
|---------|-------------|------|
| **Native Tool Use** | Structured tool calling via Claude tool_use and OpenAI function calling APIs; automatic fallback to text extraction for local models | [Tools](https://bug-ops.github.io/zeph/guide/tools.html) |
| **Hybrid Inference** | Ollama, Claude, OpenAI, Candle (GGUF) — local, cloud, or both | [OpenAI](https://bug-ops.github.io/zeph/guide/openai.html) · [Candle](https://bug-ops.github.io/zeph/guide/candle.html) |
| **Skills-First Architecture** | Embedding-based top-K matching, progressive loading, hot-reload | [Skills](https://bug-ops.github.io/zeph/guide/skills.html) |
| **Code Indexing** | AST-based chunking (tree-sitter), semantic retrieval, repo map generation, incremental indexing | [Code Indexing](https://bug-ops.github.io/zeph/guide/code-indexing.html) |
Expand All @@ -109,7 +110,7 @@ zeph (binary) — bootstrap, AnyChannel dispatch, vault resolution (anyhow for t
├── zeph-core — Agent split into 7 submodules (context, streaming, persistence,
│ learning, mcp, index), typed AgentError/ChannelError, config hot-reload
├── zeph-llm — LlmProvider: Ollama, Claude, OpenAI, Candle, orchestrator,
typed LlmError, EmbedFuture/EmbedFn type aliases
native tool_use (Claude/OpenAI), typed LlmError
├── zeph-skills — SKILL.md parser, embedding matcher, hot-reload, self-learning, typed SkillError
├── zeph-memory — SQLite + Qdrant, semantic recall, summarization, typed MemoryError
├── zeph-index — AST-based code indexing, semantic retrieval, repo map (optional)
Expand Down
13 changes: 10 additions & 3 deletions crates/zeph-core/src/agent/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,10 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
.last_skills_prompt
.clone_from(&skills_prompt);
let env = EnvironmentContext::gather(&self.model_name);
let tool_catalog = {
let tool_catalog = if self.provider.supports_tool_use() {
// Native tool_use: tools are passed via API, skip prompt-based instructions
None
} else {
let defs = self.tool_executor.tool_definitions();
if defs.is_empty() {
None
Expand All @@ -659,8 +662,12 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
}
};
#[allow(unused_mut)]
let mut system_prompt =
build_system_prompt(&skills_prompt, Some(&env), tool_catalog.as_deref());
let mut system_prompt = build_system_prompt(
&skills_prompt,
Some(&env),
tool_catalog.as_deref(),
self.provider.supports_tool_use(),
);

if !catalog_prompt.is_empty() {
system_prompt.push_str("\n\n");
Expand Down
4 changes: 2 additions & 2 deletions crates/zeph-core/src/agent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
.filter_map(|m| registry.get_skill(&m.name).ok())
.collect();
let skills_prompt = format_skills_prompt(&all_skills, std::env::consts::OS);
let system_prompt = build_system_prompt(&skills_prompt, None, None);
let system_prompt = build_system_prompt(&skills_prompt, None, None, false);
tracing::debug!(len = system_prompt.len(), "initial system prompt built");
tracing::trace!(prompt = %system_prompt, "full system prompt");

Expand Down Expand Up @@ -724,7 +724,7 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
self.skill_state
.last_skills_prompt
.clone_from(&skills_prompt);
let system_prompt = build_system_prompt(&skills_prompt, None, None);
let system_prompt = build_system_prompt(&skills_prompt, None, None, false);
if let Some(msg) = self.messages.first_mut() {
msg.content = system_prompt;
}
Expand Down
229 changes: 227 additions & 2 deletions crates/zeph-core/src/agent/streaming.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use tokio_stream::StreamExt;
use zeph_llm::provider::{LlmProvider, Message, MessagePart, Role};
use zeph_tools::executor::{ToolError, ToolExecutor, ToolOutput};
use zeph_llm::provider::{ChatResponse, LlmProvider, Message, MessagePart, Role, ToolDefinition};
use zeph_tools::executor::{ToolCall, ToolError, ToolExecutor, ToolOutput};

use crate::channel::Channel;
use crate::redact::redact_secrets;
Expand All @@ -10,6 +10,18 @@ use super::{Agent, DOOM_LOOP_WINDOW, format_tool_output};

impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C, T> {
pub(crate) async fn process_response(&mut self) -> Result<(), super::error::AgentError> {
if self.provider.supports_tool_use() {
tracing::debug!(
provider = self.provider.name(),
"using native tool_use path"
);
return self.process_response_native_tools().await;
}

tracing::debug!(
provider = self.provider.name(),
"using legacy text extraction path"
);
self.doom_loop_history.clear();

for iteration in 0..self.max_tool_iterations {
Expand Down Expand Up @@ -347,4 +359,217 @@ impl<P: LlmProvider + Clone + 'static, C: Channel, T: ToolExecutor> Agent<P, C,
std::borrow::Cow::Borrowed(text)
}
}

async fn process_response_native_tools(&mut self) -> Result<(), super::error::AgentError> {
self.doom_loop_history.clear();

let tool_defs: Vec<ToolDefinition> = self
.tool_executor
.tool_definitions()
.iter()
.map(tool_def_to_definition)
.collect();

tracing::debug!(
tool_count = tool_defs.len(),
tools = ?tool_defs.iter().map(|t| &t.name).collect::<Vec<_>>(),
"native tool_use: collected tool definitions"
);

for iteration in 0..self.max_tool_iterations {
self.channel.send_typing().await?;

if let Some(ref budget) = self.context_state.budget {
let used: usize = self
.messages
.iter()
.map(|m| estimate_tokens(&m.content))
.sum();
let threshold = budget.max_tokens() * 4 / 5;
if used >= threshold {
tracing::warn!(
iteration,
used,
threshold,
"stopping tool loop: context budget nearing limit"
);
self.channel
.send("Stopping: context window is nearly full.")
.await?;
break;
}
}

let chat_result = self.call_chat_with_tools(&tool_defs).await?;

let Some(chat_result) = chat_result else {
tracing::debug!("chat_with_tools returned None (timeout)");
return Ok(());
};

tracing::debug!(iteration, ?chat_result, "native tool loop iteration");

// Text → display and return
if let ChatResponse::Text(text) = &chat_result {
if !text.is_empty() {
let display = self.maybe_redact(text);
self.channel.send(&display).await?;
}
self.messages
.push(Message::from_legacy(Role::Assistant, text.as_str()));
self.persist_message(Role::Assistant, text).await;
return Ok(());
}

// ToolUse → execute tools and loop
let ChatResponse::ToolUse { text, tool_calls } = chat_result else {
unreachable!();
};
self.handle_native_tool_calls(text.as_deref(), &tool_calls)
.await?;

if self.check_doom_loop(iteration).await? {
break;
}
}

Ok(())
}

async fn call_chat_with_tools(
&mut self,
tool_defs: &[ToolDefinition],
) -> Result<Option<ChatResponse>, super::error::AgentError> {
tracing::debug!(
tool_count = tool_defs.len(),
provider_name = self.provider.name(),
"call_chat_with_tools"
);
let llm_timeout = std::time::Duration::from_secs(self.timeouts.llm_seconds);
let start = std::time::Instant::now();

let result = if let Ok(result) = tokio::time::timeout(
llm_timeout,
self.provider.chat_with_tools(&self.messages, tool_defs),
)
.await
{
result?
} else {
self.channel
.send("LLM request timed out. Please try again.")
.await?;
return Ok(None);
};

let latency = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
self.update_metrics(|m| {
m.api_calls += 1;
m.last_llm_latency_ms = latency;
});

Ok(Some(result))
}

async fn handle_native_tool_calls(
&mut self,
text: Option<&str>,
tool_calls: &[zeph_llm::provider::ToolUseRequest],
) -> Result<(), super::error::AgentError> {
if let Some(t) = text
&& !t.is_empty()
{
let display = self.maybe_redact(t);
self.channel.send(&display).await?;
}

let mut parts: Vec<MessagePart> = Vec::new();
if let Some(t) = text
&& !t.is_empty()
{
parts.push(MessagePart::Text { text: t.to_owned() });
}
for tc in tool_calls {
parts.push(MessagePart::ToolUse {
id: tc.id.clone(),
name: tc.name.clone(),
input: tc.input.clone(),
});
}
let assistant_msg = Message::from_parts(Role::Assistant, parts);
self.persist_message(Role::Assistant, &assistant_msg.content)
.await;
self.messages.push(assistant_msg);

let mut result_parts: Vec<MessagePart> = Vec::new();
for tc in tool_calls {
let params: std::collections::HashMap<String, serde_json::Value> =
if let serde_json::Value::Object(map) = &tc.input {
map.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
std::collections::HashMap::new()
};

let call = ToolCall {
tool_id: tc.name.clone(),
params,
};

let (output, is_error) = match self.tool_executor.execute_tool_call(&call).await {
Ok(Some(out)) => (out.summary, false),
Ok(None) => ("(no output)".to_owned(), false),
Err(e) => (format!("[error] {e}"), true),
};

let processed = self.maybe_summarize_tool_output(&output).await;
let formatted = format_tool_output(&tc.name, &processed);
let display = self.maybe_redact(&formatted);
self.channel.send(&display).await?;

result_parts.push(MessagePart::ToolResult {
tool_use_id: tc.id.clone(),
content: processed,
is_error,
});
}

let user_msg = Message::from_parts(Role::User, result_parts);
self.persist_message(Role::User, &user_msg.content).await;
self.messages.push(user_msg);

Ok(())
}

/// Returns `true` if a doom loop was detected and the caller should break.
async fn check_doom_loop(
&mut self,
iteration: usize,
) -> Result<bool, super::error::AgentError> {
if let Some(last_msg) = self.messages.last() {
self.doom_loop_history.push(last_msg.content.clone());
if self.doom_loop_history.len() >= DOOM_LOOP_WINDOW {
let recent =
&self.doom_loop_history[self.doom_loop_history.len() - DOOM_LOOP_WINDOW..];
if recent.windows(2).all(|w| w[0] == w[1]) {
tracing::warn!(
iteration,
"doom-loop detected: {DOOM_LOOP_WINDOW} consecutive identical outputs"
);
self.channel
.send("Stopping: detected repeated identical tool outputs.")
.await?;
return Ok(true);
}
}
}
Ok(false)
}
}

fn tool_def_to_definition(def: &zeph_tools::registry::ToolDef) -> ToolDefinition {
ToolDefinition {
name: def.id.to_string(),
description: def.description.to_string(),
parameters: serde_json::to_value(&def.schema).unwrap_or_default(),
}
}
Loading
Loading