Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
f81e3a9
feat: always stream for tool calling
elyasmnvidian Sep 8, 2025
03a0c56
chore: moved tool parsing to preprocessor.rs
elyasmnvidian Sep 8, 2025
218f6c5
chore: added unit tests
elyasmnvidian Sep 8, 2025
ea6fd60
chore: rebase and updated function calls
elyasmnvidian Sep 9, 2025
de7439a
fix: curl doesn't break now
ayushag-nv Sep 9, 2025
2a0b72d
chore: enabled stream=true for tool choice
elyasmnvidian Sep 9, 2025
3e00b12
chore: fix aggregator - clippy to be fixed
ayushag-nv Sep 10, 2025
cc713c6
fix: fixed rebased artifacts
ayushag-nv Sep 10, 2025
07b0572
fix: fixed tests and rebase artifacts
ayushag-nv Sep 10, 2025
f3b05d9
fix: dyn namespace scoping for trtllm (#2970)
biswapanda Sep 10, 2025
dd99c53
Merge branch 'main' into elyas/streamtool
ayushag-nv Sep 10, 2025
e9fa34c
Merge branch 'main' into elyas/streamtool
ayushag-nv Sep 15, 2025
8db8fea
Merge branch 'main' into elyas/streamtool
ayushag-nv Sep 15, 2025
290859a
feat: add standalone JailedStream implementation for token jail detec…
ryanolson Sep 15, 2025
1c83f8f
refactor: optimize JailedStream for better performance
ryanolson Sep 15, 2025
f0ed9f8
perf: optimize JailedStream to use impl Stream and remove context ove…
ryanolson Sep 15, 2025
4b83cca
refactor: update preprocessor to use new JailedStream implementation
ryanolson Sep 15, 2025
9a830aa
feat: add dual entry/exit paths for JailedStream
ryanolson Sep 15, 2025
829144e
refactor: optimize stream transformations to reduce boxing
ryanolson Sep 15, 2025
78a55a7
feat: add conditional tool jail application based on tool_choice
ryanolson Sep 16, 2025
8970fb3
chore: clean up commented-out tests and add gitignore changes
ryanolson Sep 16, 2025
1521bf9
Merge branch 'main' into ryan/streamtool
ryanolson Sep 16, 2025
ae23845
fix: resolve clippy warnings and test compilation errors
ryanolson Sep 16, 2025
e156458
test: add comprehensive jail functionality test coverage
ryanolson Sep 16, 2025
e027e61
fix: preserve Annotated metadata through jail processing
ryanolson Sep 16, 2025
3d552d3
fix: preserve trailing content after jail end markers
ryanolson Sep 16, 2025
3428610
feat: refactor jail state to support independent multi-choice processing
ryanolson Sep 16, 2025
c3cf97d
feat: implement independent multi-choice jailing architecture
ryanolson Sep 17, 2025
a3e4512
fix: separate trailing content emission for independent choice jailing
ryanolson Sep 17, 2025
5b9a665
feat: implement partial marker matching for streaming tool calls
ryanolson Sep 17, 2025
7f6c62c
refactor: standardize jail tests with human-readable assertions
ryanolson Sep 17, 2025
d40ad65
refactor: standardize jail test assertions for improved readability
ryanolson Sep 17, 2025
3128bf2
test: add comprehensive edge case tests for prefix matcher
ryanolson Sep 17, 2025
5158aea
fix: implement UTF-8 safe slicing in prefix matcher
ryanolson Sep 18, 2025
c10e14a
refactor: remove unnecessary async from tool parsing chain
ryanolson Sep 18, 2025
081126b
fix: address PR review comments for JailedStream
ryanolson Sep 19, 2025
dcc3769
chore: move tests to test_jail.rs
ayushag-nv Sep 22, 2025
728259e
chore: update cargo lock
elyasmnvidian Sep 22, 2025
a32adf4
Merge branch 'main' into ryan/streamtool
elyasmnvidian Sep 22, 2025
15b4aca
fix: bugs
ayushag-nv Sep 22, 2025
265870c
chore: fix unit test #1
elyasmnvidian Sep 22, 2025
6b26d80
fix: more bugs
ayushag-nv Sep 22, 2025
3b7c8ad
Merge branch 'main' into ryan/streamtool
ayushag-nv Sep 22, 2025
2266c81
fix: clippy
ayushag-nv Sep 22, 2025
8acf29f
chore: fix await unit test for harmony
elyasmnvidian Sep 22, 2025
d87a910
fix: harmony
ayushag-nv Sep 22, 2025
2b4c77e
Merge branch 'main' into ryan/streamtool
ayushag-nv Sep 22, 2025
72bd750
preprocessor: warn and proceed when no parser configured for tool_cho…
elyasmnvidian Sep 22, 2025
f58c24a
Merge branch 'main' into ryan/streamtool
ayushag-nv Sep 22, 2025
824bb3d
fix: cargo fmt
ayushag-nv Sep 22, 2025
e04a1cd
fix: fmt parsers
ayushag-nv Sep 22, 2025
53e6ada
fix: ci bugs
ayushag-nv Sep 23, 2025
29dff4b
Merge branch 'main' into ryan/streamtool
ayushag-nv Sep 23, 2025
8cfe06f
chore: reduced lines
ayushag-nv Sep 22, 2025
583ff5b
fix: some more reduced lines
ayushag-nv Sep 23, 2025
41c7ef8
chore: move tool end detection to parser lib
ayushag-nv Sep 23, 2025
4ff495d
chore: emit methods
ayushag-nv Sep 23, 2025
b804181
Merge branch 'main' into ayushag/jailstream-opt
ayushag-nv Sep 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
350 changes: 135 additions & 215 deletions lib/llm/src/protocols/openai/chat_completions/jail.rs

Large diffs are not rendered by default.

56 changes: 23 additions & 33 deletions lib/parsers/src/tool_calling/harmony/harmony_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,19 @@ use openai_harmony::{
HarmonyEncoding, HarmonyEncodingName, StreamableParser, load_harmony_encoding,
};
use serde_json::Value;
use std::sync::OnceLock;

static GLOBAL_HARMONY_GPTOSS_ENCODING: tokio::sync::OnceCell<
Result<HarmonyEncoding, anyhow::Error>,
> = tokio::sync::OnceCell::const_new();
static GLOBAL_HARMONY_GPTOSS_ENCODING: OnceLock<Result<HarmonyEncoding, anyhow::Error>> =
OnceLock::new();

pub async fn get_harmony_encoding() -> &'static Result<HarmonyEncoding, anyhow::Error> {
pub fn get_harmony_encoding() -> &'static Result<HarmonyEncoding, anyhow::Error> {
GLOBAL_HARMONY_GPTOSS_ENCODING
.get_or_init(|| async {
tokio::task::spawn_blocking(|| {
load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss)
})
.await
.map_err(anyhow::Error::msg)
.flatten()
})
.await
.get_or_init(|| load_harmony_encoding(HarmonyEncodingName::HarmonyGptOss))
}

/// Parse tool calls from Harmony Format text
/// <|channel|>analysis<|message|>Need to use function get_current_weather.<|end|><|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"location":"San Francisco"}<|call|>
pub async fn parse_tool_calls_harmony(
pub fn parse_tool_calls_harmony(
text: &str,
config: &JsonParserConfig,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
Expand All @@ -52,7 +44,7 @@ pub async fn parse_tool_calls_harmony(
trimmed.push_str(end_token);
}

let enc = match get_harmony_encoding().await.as_ref() {
let enc = match get_harmony_encoding().as_ref() {
Ok(e) => e,
Err(e) => {
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
Expand Down Expand Up @@ -181,11 +173,11 @@ pub async fn parse_tool_calls_harmony(
/// # Returns
/// * `Ok((tool_calls, normal_text))` - Tuple containing extracted tool calls and any normal text
/// * `Err(e)` - If parsing fails due to encoding or tokenization errors
pub async fn parse_tool_calls_harmony_complete(
pub fn parse_tool_calls_harmony_complete(
text: &str,
_config: &JsonParserConfig,
) -> anyhow::Result<(Vec<ToolCallResponse>, Option<String>)> {
let enc = match get_harmony_encoding().await.as_ref() {
let enc = match get_harmony_encoding().as_ref() {
Ok(e) => e,
Err(e) => {
tracing::debug!("Failed to load harmony encoding: {e}. Tool calls will not be parsed.");
Expand Down Expand Up @@ -346,8 +338,8 @@ mod tests {
(call.function.name, args)
}

#[tokio::test]
async fn test_parse_tool_calls_harmony_basic() {
#[test]
fn test_parse_tool_calls_harmony_basic() {
let text = r#"
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
Expand All @@ -358,7 +350,7 @@ mod tests {
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
assert_eq!(
normal_content,
Some("Need to use function get_current_weather.".to_string())
Expand All @@ -373,9 +365,7 @@ mod tests {
async fn test_parse_tool_calls_harmony_complete_basic() {
let text = r#"<|channel|>commentary to=functions.get_current_weather <|constrain|>json<|message|>{"format":"celsius","location":"San Francisco"}"#;
let (tool_calls, normal_content) =
parse_tool_calls_harmony_complete(text, &Default::default())
.await
.unwrap();
parse_tool_calls_harmony_complete(text, &Default::default()).unwrap();
assert_eq!(normal_content, Some("".to_string()));
let (name, args) = extract_name_and_args(tool_calls[0].clone());
assert_eq!(name, "get_current_weather");
Expand All @@ -394,13 +384,13 @@ mod tests {
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
assert_eq!(normal_content, Some(text.trim().to_string()));
assert_eq!(tool_calls.len(), 0);
}

#[tokio::test]
async fn test_parse_tool_calls_harmony_with_multi_args() {
#[test]
fn test_parse_tool_calls_harmony_with_multi_args() {
let text = r#"
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
Expand All @@ -411,7 +401,7 @@ mod tests {
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
assert_eq!(
normal_content,
Some("Need to use function get_current_weather.".to_string())
Expand All @@ -423,8 +413,8 @@ mod tests {
assert_eq!(args["unit"], "fahrenheit");
}

#[tokio::test]
async fn test_parse_tool_calls_harmony_with_normal_text() {
#[test]
fn test_parse_tool_calls_harmony_with_normal_text() {
let text = r#"
<|channel|>analysis<|message|>Need to use function get_current_weather.<|end|>
<|start|>assistant<|channel|>commentary to=functions.get_current_weather <|constrain|>json
Expand All @@ -435,7 +425,7 @@ mod tests {
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
assert_eq!(
normal_content,
Some("Need to use function get_current_weather.".to_string())
Expand All @@ -446,15 +436,15 @@ mod tests {
assert_eq!(args["location"], "San Francisco");
}

#[tokio::test]
async fn test_parse_tool_calls_harmony_without_call_token() {
#[test]
fn test_parse_tool_calls_harmony_without_call_token() {
let text = r#"<|channel|>analysis<|message|>We need to call get_weather function. The user asks "What's the weather like in San Francisco in Celsius?" So location: "San Francisco, CA" unit: "celsius". Let's call function.<|end|><|start|>assistant<|channel|>commentary to=functions.get_weather <|constrain|>json<|message|>{"location":"San Francisco, CA","unit":"celsius"}"#;
let config = JsonParserConfig {
tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()],
tool_call_end_tokens: vec!["<|call|>".to_string()],
..Default::default()
};
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).await.unwrap();
let (tool_calls, normal_content) = parse_tool_calls_harmony(text, &config).unwrap();
assert_eq!(normal_content, Some("We need to call get_weather function. The user asks \"What's the weather like in San Francisco in Celsius?\" So location: \"San Francisco, CA\" unit: \"celsius\". Let's call function.".to_string()));
assert_eq!(tool_calls.len(), 1);
let (name, args) = extract_name_and_args(tool_calls[0].clone());
Expand Down
4 changes: 4 additions & 0 deletions lib/parsers/src/tool_calling/harmony/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ pub use super::{config, response};
pub use harmony_parser::{
detect_tool_call_start_harmony, parse_tool_calls_harmony, parse_tool_calls_harmony_complete,
};

pub fn find_tool_call_end_position_harmony(chunk: &str) -> usize {
chunk.len()
}
28 changes: 28 additions & 0 deletions lib/parsers/src/tool_calling/json/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,31 @@ pub fn detect_tool_call_start_json(chunk: &str, config: &JsonParserConfig) -> bo
JsonParserType::DeepseekV31 => detect_tool_call_start_deepseek_v3_1(chunk, config),
}
}

pub fn find_tool_call_end_position_json(
chunk: &str,
parser: &str,
config: &JsonParserConfig,
) -> usize {
match parser {
"hermes" | "nemotron_deci" => {
if let Some(end_token) = config.tool_call_end_tokens.first() {
if let Some(pos) = chunk.find(end_token) {
pos + end_token.len()
} else {
chunk.len()
}
} else {
chunk.len()
}
}
"mistral" | "phi4" => {
if let Some(pos) = chunk.rfind(']') {
pos + 1
} else {
chunk.len()
}
}
_ => chunk.len(),
}
}
5 changes: 4 additions & 1 deletion lib/parsers/src/tool_calling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@ pub mod tools;
pub use config::{JsonParserConfig, ToolCallConfig, ToolCallParserType};
pub use harmony::{parse_tool_calls_harmony, parse_tool_calls_harmony_complete};
pub use json::try_tool_call_parse_json;
pub use parsers::{detect_and_parse_tool_call, detect_tool_call_start, try_tool_call_parse};
pub use parsers::{
detect_and_parse_tool_call, detect_tool_call_start, find_tool_call_end_position,
try_tool_call_parse,
};
pub use pythonic::try_tool_call_parse_pythonic;
pub use response::{CalledFunction, ToolCallResponse, ToolCallType};
pub use tools::{try_tool_call_parse_aggregate, try_tool_call_parse_stream};
Loading
Loading