preprocessor: warn and proceed when no parser configured for tool_choice; jail: gate common markers when parser is set and clarify local naming; tests: tighten dual-entry path assertions

elyasmnvidian · elyasmnvidian · commit 72bd7505c8e5 · 2025-09-22T15:19:44.000-07:00
diff --git a/lib/llm/src/preprocessor.rs b/lib/llm/src/preprocessor.rs
@@ -624,15 +624,24 @@ impl OpenAIPreprocessor {
     ) -> std::result::Result<bool, Error> {
         match (tool_call_parser, tool_choice, has_tools) {
             // No parser but tools requested - error cases
-            (None, Some(ChatCompletionToolChoiceOption::Required), true) => Err(anyhow::anyhow!(
-                "Tool choice 'required' specified but no tool parser configured"
-            )),
-            (None, Some(ChatCompletionToolChoiceOption::Auto), true) => Err(anyhow::anyhow!(
-                "Tool choice 'auto' specified but no tool parser configured"
-            )),
-            (None, Some(ChatCompletionToolChoiceOption::Named(_)), _) => Err(anyhow::anyhow!(
-                "Named tool choice specified but no tool parser configured"
-            )),
+            (None, Some(ChatCompletionToolChoiceOption::Required), true) => {
+                tracing::warn!(
+                    "Tool choice 'required' specified but no tool parser configured; proceeding without jailing"
+                );
+                Ok(false)
+            }
+            (None, Some(ChatCompletionToolChoiceOption::Auto), true) => {
+                tracing::warn!(
+                    "Tool choice 'auto' specified but no tool parser configured; proceeding without jailing"
+                );
+                Ok(false)
+            }
+            (None, Some(ChatCompletionToolChoiceOption::Named(_)), _) => {
+                tracing::warn!(
+                    "Named tool choice specified but no tool parser configured; proceeding without jailing"
+                );
+                Ok(false)
+            }
 
             // Parser exists and tools might be called
             (Some(_), Some(ChatCompletionToolChoiceOption::None), _) => {
@@ -864,182 +873,4 @@ impl
     }
 }
 
-#[allow(deprecated, dead_code)]
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use dynamo_async_openai::types::{
-        ChatChoiceStream, ChatCompletionStreamResponseDelta, FinishReason as OAIFinishReason, Role,
-    };
-
-    use dynamo_runtime::protocols::annotated::Annotated;
-
-    use std::sync::Arc;
-
-    // Helper function to create a mock chat response chunk
-    fn create_mock_response_chunk(
-        content: String,
-        index: u32,
-    ) -> Annotated<NvCreateChatCompletionStreamResponse> {
-        let choice = ChatChoiceStream {
-            index,
-            delta: ChatCompletionStreamResponseDelta {
-                role: Some(Role::Assistant),
-                content: Some(content),
-                tool_calls: None,
-                function_call: None,
-                refusal: None,
-                reasoning_content: None,
-            },
-            finish_reason: None,
-            logprobs: None,
-        };
-
-        let response = NvCreateChatCompletionStreamResponse {
-            id: "test-id".to_string(),
-            choices: vec![choice],
-            created: 1234567890,
-            model: "test-model".to_string(),
-            system_fingerprint: Some("test-fingerprint".to_string()),
-            object: "chat.completion.chunk".to_string(),
-            usage: None,
-            service_tier: None,
-        };
-
-        Annotated {
-            data: Some(response),
-            id: None,
-            event: None,
-            comment: None,
-        }
-    }
-
-    // Helper function to create a final response chunk with finish reason
-    fn create_final_response_chunk(index: u32) -> Annotated<NvCreateChatCompletionStreamResponse> {
-        let choice = ChatChoiceStream {
-            index,
-            delta: ChatCompletionStreamResponseDelta {
-                role: None,
-                content: None,
-                tool_calls: None,
-                function_call: None,
-                refusal: None,
-                reasoning_content: None,
-            },
-            finish_reason: Some(OAIFinishReason::Stop),
-            logprobs: None,
-        };
-
-        let response = NvCreateChatCompletionStreamResponse {
-            id: "test-id".to_string(),
-            choices: vec![choice],
-            created: 1234567890,
-            model: "test-model".to_string(),
-            system_fingerprint: Some("test-fingerprint".to_string()),
-            object: "chat.completion.chunk".to_string(),
-            usage: None,
-            service_tier: None,
-        };
-
-        Annotated {
-            data: Some(response),
-            id: None,
-            event: None,
-            comment: None,
-        }
-    }
-
-    // Mock async engine context for testing
-    #[derive(Debug)]
-    struct MockAsyncEngineContext {
-        id: String,
-        stopped: std::sync::atomic::AtomicBool,
-    }
-
-    impl MockAsyncEngineContext {
-        fn new(id: String) -> Self {
-            Self {
-                id,
-                stopped: std::sync::atomic::AtomicBool::new(false),
-            }
-        }
-    }
-
-    #[async_trait]
-    impl dynamo_runtime::pipeline::AsyncEngineContext for MockAsyncEngineContext {
-        fn id(&self) -> &str {
-            &self.id
-        }
-
-        fn stop(&self) {
-            self.stopped
-                .store(true, std::sync::atomic::Ordering::Relaxed);
-        }
-
-        fn stop_generating(&self) {
-            self.stopped
-                .store(true, std::sync::atomic::Ordering::Relaxed);
-        }
-
-        fn kill(&self) {
-            self.stopped
-                .store(true, std::sync::atomic::Ordering::Relaxed);
-        }
-
-        fn is_stopped(&self) -> bool {
-            self.stopped.load(std::sync::atomic::Ordering::Relaxed)
-        }
-
-        fn is_killed(&self) -> bool {
-            self.stopped.load(std::sync::atomic::Ordering::Relaxed)
-        }
-
-        async fn stopped(&self) {
-            // No-op for testing
-        }
-
-        async fn killed(&self) {
-            // No-op for testing
-        }
-
-        fn link_child(&self, _: Arc<dyn dynamo_runtime::pipeline::AsyncEngineContext>) {
-            // No-op for testing
-        }
-    }
-
-    // Test for tool call detection with different parsers - still valuable to keep
-    #[tokio::test]
-    async fn test_detect_tool_call_start_different_parsers() {
-        use dynamo_parsers::tool_calling::detect_tool_call_start;
-
-        // Test nemotron_deci parser
-        assert!(detect_tool_call_start("<TOOLCALL>", Some("nemotron_deci")).unwrap());
-        assert!(!detect_tool_call_start("Hello world", Some("nemotron_deci")).unwrap());
-        assert!(!detect_tool_call_start("<tool_call>", Some("nemotron_deci")).unwrap()); // Wrong format
-
-        // Test hermes parser - now also detects JSON patterns
-        assert!(detect_tool_call_start("<tool_call>", Some("hermes")).unwrap());
-        assert!(detect_tool_call_start("{\"name\": \"test\"}", Some("hermes")).unwrap()); // JSON detection
-        assert!(!detect_tool_call_start("Hello world", Some("hermes")).unwrap());
-        assert!(!detect_tool_call_start("<TOOLCALL>", Some("hermes")).unwrap()); // Wrong format
-
-        // Test phi4 parser
-        assert!(detect_tool_call_start("functools[", Some("phi4")).unwrap());
-        assert!(detect_tool_call_start("{\"name\": \"test\"}", Some("phi4")).unwrap()); // JSON detection
-        assert!(!detect_tool_call_start("Hello world", Some("phi4")).unwrap());
-
-        // Test mistral parser
-        assert!(detect_tool_call_start("[{", Some("mistral")).unwrap());
-        assert!(detect_tool_call_start("[TOOL_CALLS]", Some("mistral")).unwrap());
-        assert!(!detect_tool_call_start("Hello world", Some("mistral")).unwrap());
-
-        // Test llama3_json parser
-        assert!(detect_tool_call_start("<|python_tag|>", Some("llama3_json")).unwrap());
-        assert!(detect_tool_call_start("{\"name\": \"test\"}", Some("llama3_json")).unwrap()); // JSON detection
-
-        // Test default parser (should behave like nemotron_deci)
-        assert!(detect_tool_call_start("<TOOLCALL>", None).unwrap());
-        assert!(detect_tool_call_start("{\"name\": \"test\"}", None).unwrap()); // JSON detection
-        assert!(!detect_tool_call_start("Hello world", None).unwrap());
-    }
-}
+// Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs`
diff --git a/lib/llm/src/protocols/openai/chat_completions/jail.rs b/lib/llm/src/protocols/openai/chat_completions/jail.rs
@@ -140,9 +140,9 @@ impl ChoiceJailState {
                     let full_content = format!("{}{}", marker, suffix);
 
                     // Check if this already contains the end marker
-                    let (should_unjail, split_pos) = jail_stream.should_end_jail(&full_content);
+                    let (should_end, split_pos) = jail_stream.should_end_jail(&full_content);
 
-                    if should_unjail {
+                    if should_end {
                         // Complete tool call found in this chunk
                         tracing::debug!(
                             "Choice {} complete tool call detected in single chunk",
@@ -272,9 +272,9 @@ impl ChoiceJailState {
             // Already jailed - accumulate and check for unjail
             self.accumulate(content);
 
-            let (should_unjail, split_pos) = jail_stream.should_end_jail(&self.accumulated_content);
+            let (should_end, split_pos) = jail_stream.should_end_jail(&self.accumulated_content);
 
-            if should_unjail {
+            if should_end {
                 tracing::debug!(
                     "Choice {} jail exit detected, releasing accumulated content",
                     choice.index
@@ -919,22 +919,24 @@ impl JailedStreamBuilder {
         }
 
         // Add common tool call markers to ensure we detect all formats
-        // These are always included even when a specific parser is configured
-        // to provide broad compatibility and prevent missed tool calls
-        let common_markers = vec![
-            "<TOOLCALL>".to_string(),     // nemotron_deci format
-            "<tool_call>".to_string(),    // hermes format
-            "[TOOL_CALLS]".to_string(),   // mistral format
-            "<|python_tag|>".to_string(), // llama3_json format
-            "functools[".to_string(),     // phi4 format
-            // Add JSON start patterns for Mistral-style tool calls
-            "[{".to_string(),
-            "{".to_string(),
-            // Note: Harmony parser uses JSON patterns, covered by "{" above
-        ];
-        for marker in common_markers {
-            if !all_patterns.contains(&marker) {
-                all_patterns.push(marker);
+        // Only include these when a specific parser is NOT configured,
+        // to avoid unexpected false positives for explicit formats
+        if self.tool_call_parser.is_none() {
+            let common_markers = vec![
+                "<TOOLCALL>".to_string(),     // nemotron_deci format
+                "<tool_call>".to_string(),    // hermes format
+                "[TOOL_CALLS]".to_string(),   // mistral format
+                "<|python_tag|>".to_string(), // llama3_json format
+                "functools[".to_string(),     // phi4 format
+                // Add JSON start patterns for Mistral-style tool calls
+                "[{".to_string(),
+                "{".to_string(),
+                // Note: Harmony parser uses JSON patterns, covered by "{" above
+            ];
+            for marker in common_markers {
+                if !all_patterns.contains(&marker) {
+                    all_patterns.push(marker);
+                }
             }
         }
 
diff --git a/lib/llm/tests/test_jail.rs b/lib/llm/tests/test_jail.rs
@@ -434,6 +434,11 @@ mod tests {
         let jailed_stream = jail.apply(input_stream);
         let results: Vec<_> = jailed_stream.collect().await;
 
+        // We should get 2 chunks:
+        // 1. "Normal text " (before jail)
+        // 2. Accumulated jailed content when jail ends via </jail>
+        assert_eq!(results.len(), 2);
+
         // First chunk should pass through
         assert_eq!(
             results[0].data.as_ref().unwrap().choices[0]
@@ -443,8 +448,17 @@ mod tests {
             Some("Normal text ")
         );
 
-        // Jail should trigger and accumulate
-        assert!(results.len() >= 2);
+        // Second chunk should contain the accumulated jailed content
+        let jailed = results[1]
+            .data
+            .as_ref()
+            .unwrap()
+            .choices[0]
+            .delta
+            .content
+            .as_ref()
+            .expect("Expected accumulated jailed content");
+        assert!(jailed.contains("<jail><TOOLCALL>Jailed content</jail>"));
     }
 
     #[tokio::test]