Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [Unreleased]

### Added
- Structured LLM output via `chat_typed<T>()` on `LlmProvider` trait with JSON schema enforcement (#456)
- OpenAI/Compatible native `response_format: json_schema` structured output (#457)
- Claude structured output via forced tool use pattern (#458)
- `Extractor<T>` utility for typed data extraction from LLM responses (#459)

## [0.10.0] - 2026-02-18

### Fixed
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/zeph-llm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ hf-hub = { workspace = true, optional = true }
ollama-rs.workspace = true
reqwest = { workspace = true, features = ["json", "rustls", "stream"] }
serde = { workspace = true, features = ["derive"] }
schemars.workspace = true
serde_json.workspace = true
tokenizers = { workspace = true, optional = true }
tokio = { workspace = true, features = ["rt", "sync", "time"] }
Expand Down
61 changes: 61 additions & 0 deletions crates/zeph-llm/src/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ use crate::mock::MockProvider;
use crate::ollama::OllamaProvider;
use crate::openai::OpenAiProvider;
use crate::orchestrator::ModelOrchestrator;
use schemars::JsonSchema;
use serde::de::DeserializeOwned;

use crate::provider::{ChatResponse, ChatStream, LlmProvider, Message, StatusTx, ToolDefinition};
use crate::router::RouterProvider;

Expand Down Expand Up @@ -54,6 +57,16 @@ impl AnyProvider {
}
}

/// # Errors
///
/// Returns an error if the provider fails or the response cannot be parsed.
pub async fn chat_typed_erased<T>(&self, messages: &[Message]) -> Result<T, crate::LlmError>
where
T: DeserializeOwned + JsonSchema,
{
delegate_provider!(self, |p| p.chat_typed::<T>(messages).await)
}

/// Propagate a status sender to the inner provider (where supported).
pub fn set_status_tx(&mut self, tx: StatusTx) {
match self {
Expand Down Expand Up @@ -110,6 +123,10 @@ impl LlmProvider for AnyProvider {
delegate_provider!(self, |p| p.name())
}

fn supports_structured_output(&self) -> bool {
delegate_provider!(self, |p| p.supports_structured_output())
}

fn supports_tool_use(&self) -> bool {
delegate_provider!(self, |p| p.supports_tool_use())
}
Expand Down Expand Up @@ -416,4 +433,48 @@ mod tests {
let debug = format!("{provider:?}");
assert!(debug.contains("OpenAi"));
}

#[cfg(feature = "mock")]
#[tokio::test]
async fn chat_typed_erased_dispatches_to_mock() {
#[derive(Debug, serde::Deserialize, schemars::JsonSchema, PartialEq)]
struct TestOutput {
value: String,
}

let mock =
crate::mock::MockProvider::with_responses(vec![r#"{"value": "from_mock"}"#.into()]);
let provider = AnyProvider::Mock(mock);
let messages = vec![Message::from_legacy(Role::User, "test")];
let result: TestOutput = provider.chat_typed_erased(&messages).await.unwrap();
assert_eq!(
result,
TestOutput {
value: "from_mock".into()
}
);
}

#[test]
fn any_openai_supports_structured_output() {
let provider = AnyProvider::OpenAi(crate::openai::OpenAiProvider::new(
"key".into(),
"https://api.openai.com/v1".into(),
"gpt-4o".into(),
1024,
None,
None,
));
assert!(provider.supports_structured_output());
}

#[test]
fn any_ollama_does_not_support_structured_output() {
let provider = AnyProvider::Ollama(OllamaProvider::new(
"http://localhost:11434",
"test".into(),
"embed".into(),
));
assert!(!provider.supports_structured_output());
}
}
95 changes: 95 additions & 0 deletions crates/zeph-llm/src/claude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,84 @@ impl LlmProvider for ClaudeProvider {
"claude"
}

fn supports_structured_output(&self) -> bool {
true
}

async fn chat_typed<T>(&self, messages: &[Message]) -> Result<T, LlmError>
where
T: serde::de::DeserializeOwned + schemars::JsonSchema,
Self: Sized,
{
let schema = schemars::schema_for!(T);
let schema_value =
serde_json::to_value(&schema).map_err(|e| LlmError::StructuredParse(e.to_string()))?;
let type_name = std::any::type_name::<T>()
.rsplit("::")
.next()
.unwrap_or("Output");

let tool_name = format!("submit_{type_name}");
let tool = ToolDefinition {
name: tool_name.clone(),
description: format!("Submit the structured {type_name} result"),
parameters: schema_value,
};

let (system, chat_messages) = split_messages_structured(messages);
let api_tool = AnthropicTool {
name: &tool.name,
description: &tool.description,
input_schema: &tool.parameters,
};

let system_blocks = system.map(|s| split_system_into_blocks(&s));
let body = TypedToolRequestBody {
model: &self.model,
max_tokens: self.max_tokens,
system: system_blocks,
messages: &chat_messages,
tools: &[api_tool],
tool_choice: ToolChoice {
r#type: "tool",
name: &tool_name,
},
};

let response = self
.client
.post(API_URL)
.header("x-api-key", &self.api_key)
.header("anthropic-version", ANTHROPIC_VERSION)
.header("anthropic-beta", ANTHROPIC_BETA)
.header("content-type", "application/json")
.json(&body)
.send()
.await?;

let status = response.status();
let text = response.text().await.map_err(LlmError::Http)?;

if !status.is_success() {
return Err(LlmError::Other(format!(
"Claude API request failed (status {status})"
)));
}

let resp: ToolApiResponse = serde_json::from_str(&text)?;

for block in resp.content {
if let AnthropicContentBlock::ToolUse { input, .. } = block {
return serde_json::from_value::<T>(input)
.map_err(|e| LlmError::StructuredParse(e.to_string()));
}
}

Err(LlmError::StructuredParse(
"no tool_use block in response".into(),
))
}

fn supports_tool_use(&self) -> bool {
true
}
Expand Down Expand Up @@ -506,6 +584,23 @@ fn split_system_into_blocks(system: &str) -> Vec<SystemContentBlock> {
blocks
}

#[derive(Serialize)]
struct TypedToolRequestBody<'a> {
model: &'a str,
max_tokens: u32,
#[serde(skip_serializing_if = "Option::is_none")]
system: Option<Vec<SystemContentBlock>>,
messages: &'a [StructuredApiMessage],
tools: &'a [AnthropicTool<'a>],
tool_choice: ToolChoice<'a>,
}

#[derive(Serialize)]
struct ToolChoice<'a> {
r#type: &'a str,
name: &'a str,
}

#[derive(Serialize)]
struct AnthropicTool<'a> {
name: &'a str,
Expand Down
12 changes: 12 additions & 0 deletions crates/zeph-llm/src/compatible.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ impl LlmProvider for CompatibleProvider {
self.leaked_name
}

fn supports_structured_output(&self) -> bool {
self.inner.supports_structured_output()
}

async fn chat_typed<T>(&self, messages: &[Message]) -> Result<T, LlmError>
where
T: serde::de::DeserializeOwned + schemars::JsonSchema,
Self: Sized,
{
self.inner.chat_typed(messages).await
}

fn supports_tool_use(&self) -> bool {
self.inner.supports_tool_use()
}
Expand Down
3 changes: 3 additions & 0 deletions crates/zeph-llm/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ pub enum LlmError {
#[error("candle error: {0}")]
Candle(#[from] candle_core::Error),

#[error("structured output parse failed: {0}")]
StructuredParse(String),

#[error("{0}")]
Other(String),
}
Expand Down
Loading
Loading