Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
4aba7a0
Update GPT OSS parser and related components
zhongdaor-nv Sep 10, 2025
de1a915
tmp
zhongdaor-nv Sep 10, 2025
da62e0c
feat(parsers): enhance harmony tool calling parser and add debug stat…
zhongdaor-nv Sep 10, 2025
471f1cb
tmp
zhongdaor-nv Sep 10, 2025
63d639a
remove code for debugging
zhongdaor-nv Sep 10, 2025
244d31c
resolve coderabit comment
zhongdaor-nv Sep 10, 2025
f50ed91
cargo fmt
zhongdaor-nv Sep 10, 2025
855fb29
coderabbit
zhongdaor-nv Sep 10, 2025
97f8f65
fix unit test
zhongdaor-nv Sep 10, 2025
bd56609
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 10, 2025
33e7286
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 11, 2025
c9485d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 15, 2025
9a8ce48
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 15, 2025
fbc5155
resolve comment
zhongdaor-nv Sep 15, 2025
031c965
Resolve merge conflicts in preprocessor.rs
zhongdaor-nv Sep 15, 2025
40c1d03
make ci/cd happy
zhongdaor-nv Sep 16, 2025
80b2ac8
Merge remote-tracking branch 'origin/main' into zhongdaor/gpt-oss-fro…
zhongdaor-nv Sep 18, 2025
9392353
merge to main
zhongdaor-nv Sep 18, 2025
489019a
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
a733a8b
cargo fmt
zhongdaor-nv Sep 18, 2025
cdb9e7f
cargo test
zhongdaor-nv Sep 18, 2025
c0e22d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
8c5d62b
add test for test_parse_tool_calls_harmony_complete_basic
zhongdaor-nv Sep 18, 2025
67995d7
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
eb7855f
add more comment
zhongdaor-nv Sep 18, 2025
ca70608
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
e579b3a
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
fdc9f0d
Merge branch 'main' into zhongdaor/gpt-oss-frontend
zhongdaor-nv Sep 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,4 @@ opt-level = 3
[profile.release]
# These make the build much slower but shrink the binary, and could help performance
codegen-units = 1
lto = true
lto = true
9 changes: 8 additions & 1 deletion lib/llm/src/discovery/watcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,14 @@ impl ModelWatcher {
let client = component.endpoint(&endpoint_id.name).client().await?;
let model_slug = model_entry.slug();
let card = match ModelDeploymentCard::load_from_store(&model_slug, &self.drt).await {
Ok(Some(card)) => card,
Ok(Some(mut card)) => {
tracing::debug!(card.display_name, "adding model");
// Ensure runtime_config is populated
if let Some(rc) = model_entry.runtime_config.clone() {
card.runtime_config = rc;
}
card
}
Ok(None) => {
anyhow::bail!("Missing ModelDeploymentCard in storage under key {model_slug}");
}
Expand Down
8 changes: 8 additions & 0 deletions lib/llm/src/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ pub struct OpenAIPreprocessor {
formatter: Arc<dyn OAIPromptFormatter>,
tokenizer: Arc<dyn Tokenizer>,
model_info: Arc<dyn ModelInfo>,
/// Per-model runtime configuration propagated to response generator (e.g., reasoning/tool parser)
runtime_config: crate::local_model::runtime_config::ModelRuntimeConfig,
tool_call_parser: Option<String>,
}

Expand Down Expand Up @@ -187,11 +189,15 @@ impl OpenAIPreprocessor {
let model_info = model_info.get_model_info()?;
let tool_call_parser = mdc.runtime_config.tool_call_parser.clone();

// // Initialize runtime config from the ModelDeploymentCard
let runtime_config = mdc.runtime_config.clone();

Ok(Arc::new(Self {
formatter,
tokenizer,
model_info,
mdcsum,
runtime_config,
tool_call_parser,
}))
}
Expand Down Expand Up @@ -948,6 +954,8 @@ impl
let response_generator = request.response_generator(context.id().to_string());
let mut response_generator = Box::new(response_generator);

// set the runtime configuration
response_generator.set_reasoning_parser(self.runtime_config.clone());
let enable_tool_calling =
maybe_enable_tool_call(self.tool_call_parser.as_deref(), &request);
// convert the chat completion request to a common completion request
Expand Down
14 changes: 14 additions & 0 deletions lib/llm/src/protocols/openai/chat_completions/delta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,20 @@ impl DeltaGenerator {
}
}

/// Update runtime configuration and reconfigure the reasoning parser accordingly.
pub fn set_reasoning_parser(&mut self, runtime_config: ModelRuntimeConfig) {
self.options.runtime_config = runtime_config.clone();
match self.options.runtime_config.reasoning_parser.as_deref() {
Some(name) => {
self.reasoning_parser =
Some(ReasoningParserType::get_reasoning_parser_from_name(name));
}
None => {
self.reasoning_parser = None;
}
}
}

/// Updates the prompt token usage count.
///
/// # Arguments
Expand Down
63 changes: 60 additions & 3 deletions lib/parsers/src/reasoning/gpt_oss_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl ReasoningParser for GptOssReasoningParser {

fn parse_reasoning_streaming_incremental(
&mut self,
_text: &str,
text: &str,
token_ids: &[u32],
) -> ParserResult {
tracing::debug!(
Expand All @@ -173,9 +173,8 @@ impl ReasoningParser for GptOssReasoningParser {
}

if let Some(channel) = self.parser.current_channel() {
tracing::debug!("Current channel: {}", channel);
tracing::debug!("Current channel {}", channel);
if channel == "final" {
tracing::debug!("In final channel, processing normal text");
// If we're in the final channel, we should not parse reasoning
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
tracing::debug!("Got normal text delta of {} chars", current.len());
Expand All @@ -186,6 +185,64 @@ impl ReasoningParser for GptOssReasoningParser {
}
tracing::debug!("No content delta in final channel");
ParserResult::default()
} else if channel == "commentary" {
// If we're in the commentary channel, we should return raw token content and recover content that has been consumed by the parser
// so that the tool parser can process it properly
if let Ok(enc) = get_harmony_encoding() {
let current_content = self.parser.current_content().unwrap_or_default();
let mut final_text = text.to_string();

// Restore commentary metadata consumed by the parser so the tool-call parser can
// process it correctly.
//
// Example:
// Before parsing:
// "<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{\"format\":\"celsius\",\"location\":\"San Francisco\"}<|call|>"
// After parsing, the header is stripped, so we must reconstruct it:
// "<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>"
//
// This ensures downstream tool-call parsing receives the channel, target, and
// constraint metadata together with the message payload.

// Recovery should only happen once, and only when `current_content` is empty.
if current_content.is_empty() {
let tokens = self.parser.tokens();

// Get the token id for " <|channel|>"
let channel_token_id = enc
.tokenizer()
.encode_with_special_tokens("<|channel|>")
.last()
.copied();

// Find the last occurrence of the <|channel|> token (id 20005) in the tokens vector
let last_channel_token_idx = channel_token_id
.and_then(|token_id| {
tokens.iter().rposition(|token| *token == token_id)
})
.unwrap_or(0);

// Then get the generated text from the last <|channel|> to the end of self.parser.tokens()
let end_token_idx = self.parser.tokens().len();
// Use Harmony's decode_utf8 to decode tokens into text
let generated_text = enc
.tokenizer()
.decode_utf8(
&self.parser.tokens()[last_channel_token_idx..end_token_idx],
)
.unwrap_or_default();

final_text = generated_text;
}

ParserResult {
normal_text: final_text,
reasoning_text: String::new(),
}
} else {
tracing::warn!("Failed to get harmony encoding for raw token decoding");
ParserResult::default()
}
} else {
tracing::debug!("In reasoning channel: {}", channel);
if let Some(current) = self.parser.last_content_delta().unwrap_or_default() {
Expand Down
122 changes: 120 additions & 2 deletions lib/parsers/src/tool_calling/harmony/harmony_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

use super::config::JsonParserConfig;
use super::response::{CalledFunction, ToolCallResponse, ToolCallType};
use openai_harmony::StreamableParser;
use openai_harmony::chat::{Content::Text, Role};
use openai_harmony::{HarmonyEncoding, HarmonyEncodingName, load_harmony_encoding};
use openai_harmony::{
HarmonyEncoding, HarmonyEncodingName, StreamableParser, load_harmony_encoding,
};
use serde_json::Value;

static GLOBAL_HARMONY_GPTOSS_ENCODING: tokio::sync::OnceCell<
Expand Down Expand Up @@ -162,6 +163,109 @@ pub async fn parse_tool_calls_harmony(
Ok((res, Some(normal_text.to_string())))
}

/// Parse tool calls from a complete Harmony Format text chunk using direct token parsing.
///
/// This function is optimized for parsing complete text chunks where the entire content
/// is available at once. It uses `parse_messages_from_completion_tokens` to directly
/// parse all tokens into Harmony Format messages, then extracts tool calls from messages
/// with the "commentary" channel and "functions.*" recipients.
///
/// Unlike `parse_tool_calls_harmony`, this function doesn't perform start token detection
/// or token-by-token streaming, making it more efficient for complete chunks.
///
/// # Arguments
/// * `text` - The full Harmony-format string to be parsed, excluding any trailing stop tokens.
/// Example:
/// `<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco"}`
/// * `_config` - Parser configuration (currently unused but kept for API consistency)
///
/// # Returns
/// * `Ok((tool_calls, normal_text))` - Tuple containing extracted tool calls and any normal text
/// * `Err(e)` - If parsing fails due to encoding or tokenization errors
pub async fn parse_tool_calls_harmony_complete(
text: &str,
_config: &JsonParserConfig,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
let enc = match get_harmony_encoding().await.as_ref() {
Ok(e) => e,
Err(e) => {
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
return Ok((vec![], Some(text.to_string())));
}
};

// // Encode the text into tokens using harmony encoding
let tokens: Vec<u32> = enc.tokenizer().encode_with_special_tokens(text);
let messages = match enc.parse_messages_from_completion_tokens(tokens, Some(Role::Assistant)) {
Ok(messages) => messages,
Err(e) => {
tracing::debug!(
"Failed to parse messages from completion tokens: {e}. Tool calls will not be parsed."
);
return Ok((vec![], Some(text.to_string())));
}
};

let mut normal_text = String::new();

let mut res = Vec::with_capacity(messages.len());
let mut call_idx = 0; // Index of the tool call

for message in messages.iter() {
if message.author.role != Role::Assistant {
continue;
}

let channel = message.channel.as_deref();
let recipient = message.recipient.as_deref().unwrap_or_default();

// Handle commentary channel
if channel == Some("commentary") && recipient.starts_with("functions.") {
let Some(fname) = message
.recipient
.as_ref()
.and_then(|r| r.split('.').nth(1))
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
else {
continue;
};

let args = match message.content.first() {
Some(Text(text)) => match serde_json::from_str::<Value>(text.text.trim()) {
Ok(value) => value,
Err(_) => {
Value::Null // Set args to null if it's not valid JSON
}
},
_ => {
Value::Null // Set args to null if it's not a text content
}
};
// Add tool call to result if args is valid JSON
if !args.is_null() {
call_idx += 1;
res.push(ToolCallResponse {
id: format!("call-{}", call_idx),
tp: ToolCallType::Function,
function: CalledFunction {
name: fname.to_string(),
// Safety: `Value::Object` is always valid JSON, so serialization cannot fail
arguments: serde_json::to_string(&args).unwrap(),
},
});
}
// Handle reasoning(analysis) channel
} else if channel == Some("analysis") {
normal_text.push_str(match &message.content[0] {
Text(t) => &t.text,
_ => "",
});
}
}
Ok((res, Some(normal_text.to_string())))
}

pub fn detect_tool_call_start_harmony(
chunk: &str,
config: &JsonParserConfig,
Expand Down Expand Up @@ -266,6 +370,20 @@ mod tests {
assert_eq!(args["location"], "San Francisco");
}

#[tokio::test]
async fn test_parse_tool_calls_harmony_complete_basic() {
let text = r#"<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"format":"celsius","location":"San Francisco"}"#;
let (tool_calls, normal_content) =
parse_tool_calls_harmony_complete(text, &Default::default())
.await
.unwrap();
assert_eq!(normal_content, Some("".to_string()));
let (name, args) = extract_name_and_args(tool_calls[0].clone());
assert_eq!(name, "get_current_weather");
assert_eq!(args["location"], "San Francisco");
assert_eq!(args["format"], "celsius");
}

#[tokio::test]
async fn test_parse_tools_harmony_without_start_token() {
let text = r#"
Expand Down
4 changes: 3 additions & 1 deletion lib/parsers/src/tool_calling/harmony/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@
pub mod harmony_parser;

pub use super::{config, response};
pub use harmony_parser::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
pub use harmony_parser::{
detect_tool_call_start_harmony, parse_tool_calls_harmony, parse_tool_calls_harmony_complete,
};
2 changes: 1 addition & 1 deletion lib/parsers/src/tool_calling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub mod tools;

// Re-export main types and functions for convenience
pub use config::{JsonParserConfig, ToolCallConfig, ToolCallParserType};
pub use harmony::parse_tool_calls_harmony;
pub use harmony::{parse_tool_calls_harmony, parse_tool_calls_harmony_complete};
pub use json::try_tool_call_parse_json;
pub use parsers::{detect_and_parse_tool_call, try_tool_call_parse};
pub use pythonic::try_tool_call_parse_pythonic;
Expand Down
10 changes: 4 additions & 6 deletions lib/parsers/src/tool_calling/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use super::config::{ToolCallConfig, ToolCallParserType};
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony};
use super::harmony::{detect_tool_call_start_harmony, parse_tool_calls_harmony_complete};
use super::json::{detect_tool_call_start_json, try_tool_call_parse_json};
use super::pythonic::{detect_tool_call_start_pythonic, try_tool_call_parse_pythonic};
use super::response::ToolCallResponse;
Expand Down Expand Up @@ -43,7 +43,8 @@ pub async fn try_tool_call_parse(
Ok((results, normal_content))
}
ToolCallParserType::Harmony => {
let (results, normal_content) = parse_tool_calls_harmony(message, &config.json).await?;
let (results, normal_content) =
parse_tool_calls_harmony_complete(message, &config.json).await?;
Ok((results, normal_content))
}
ToolCallParserType::Pythonic => {
Expand Down Expand Up @@ -1450,10 +1451,7 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
#[tokio::test]
async fn test_harmony_parser_basic() {
let input = r#"
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
<|message|>{"location":"San Francisco", "unit":"fahrenheit"}<|call|>
"#;
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|><|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco", "unit":"fahrenheit"}"#;
let (result, content) = detect_and_parse_tool_call(input, Some("harmony"))
.await
.unwrap();
Expand Down
Loading