feat: python as a subprocess reasoning parser implementation

nachiketb-nvidia · nachiketb-nvidia · commit 8762d449817c · 2025-08-27T10:57:57.000-07:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/components/backends/vllm/src/dynamo/vllm/args.py b/components/backends/vllm/src/dynamo/vllm/args.py
@@ -117,7 +117,7 @@ def parse_args() -> Config:
         "--dyn-reasoning-parser",
         type=str,
         default=None,
-        help="Reasoning parser name for the model. Available options: 'basic', 'deepseek_r1', 'gpt_oss'.",
+        help="Reasoning parser name for the model. Available options: 'basic', 'deepseek_r1', 'gpt_oss'. This can also be a file path to a custom Python reasoning parser implementation of the `dynamo.reasoning_parser.BaseReasoningParser` interface.",
     )
 
     parser = AsyncEngineArgs.add_cli_args(parser)
diff --git a/example_python_parser.py b/example_python_parser.py
diff --git a/lib/bindings/python/examples/basic_reasoning_parser/basic_parser.py b/lib/bindings/python/examples/basic_reasoning_parser/basic_parser.py
@@ -1,8 +1,12 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
 from typing import Sequence, Tuple
 
 # import from __init__.py in the same directory
 from dynamo.reasoning_parser import BaseReasoningParser
 
+
 class BasicReasoningParser(BaseReasoningParser):
     """Base class providing two sets of interfaces: one-time and streaming incremental."""
 
@@ -17,7 +21,9 @@ def __init__(
         self._buffer = ""
         self.stripped_think_start = False
 
-    def detect_and_parse_reasoning(self, text: str, _token_ids: Sequence[int]) -> Tuple[str, str]:
+    def detect_and_parse_reasoning(
+        self, text: str, _token_ids: Sequence[int]
+    ) -> Tuple[str, str]:
         """
         One-time parsing: Detects and parses reasoning sections in the provided text.
         Returns both reasoning content and normal text separately.
@@ -41,7 +47,9 @@ def detect_and_parse_reasoning(self, text: str, _token_ids: Sequence[int]) -> Tu
 
         return (normal_text, reasoning_text)
 
-    def parse_reasoning_streaming_incremental(self, new_text: str, _token_ids: Sequence[int]) -> Tuple[str, str]:
+    def parse_reasoning_streaming_incremental(
+        self, new_text: str, _token_ids: Sequence[int]
+    ) -> Tuple[str, str]:
         """
         Streaming incremental parsing for reasoning content.
         Handles partial reasoning tags and content.
@@ -82,7 +90,6 @@ def parse_reasoning_streaming_incremental(self, new_text: str, _token_ids: Seque
         # Continue with reasoning content
         if self._in_reasoning:
             if self.stream_reasoning:
-                # Stream the content immediately
                 self._buffer = ""
                 return ("", current_text)
             else:
@@ -93,4 +100,4 @@ def parse_reasoning_streaming_incremental(self, new_text: str, _token_ids: Seque
             self._buffer = ""
             return (current_text, "")
 
-        return ("", "")
+        return ("", "")
diff --git a/lib/bindings/python/src/dynamo/reasoning_parser/__init__.py b/lib/bindings/python/src/dynamo/reasoning_parser/__init__.py
@@ -1,21 +1,23 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
 
+from typing import Protocol, Sequence, Tuple
 
-from typing import Sequence, Tuple
-from typing import Protocol
 
 class BaseReasoningParser(Protocol):
-
     def __init__(self):
         """Initialize the reasoning parser.
 
         This method should set up any necessary internal state or configurations.
 
         Signature must not change and must not take any arguments other than self.
-        
+
         """
         ...
 
-    def detect_and_parse_reasoning(self, text: str, token_ids: Sequence[int]) -> Tuple[str, str]:
+    def detect_and_parse_reasoning(
+        self, text: str, token_ids: Sequence[int]
+    ) -> Tuple[str, str]:
         """Detect and parse reasoning from the given text and token IDs.
 
         Args:
@@ -28,8 +30,10 @@ def detect_and_parse_reasoning(self, text: str, token_ids: Sequence[int]) -> Tup
             (normal_text, reasoning_text)
         """
         ...
-    
-    def parse_reasoning_streaming_incremental(self, text: str, token_ids: Sequence[int]) -> Tuple[str, str]:
+
+    def parse_reasoning_streaming_incremental(
+        self, text: str, token_ids: Sequence[int]
+    ) -> Tuple[str, str]:
         """Parse reasoning from the given text and token IDs in a streaming incremental manner.
 
         Args:
diff --git a/lib/bindings/python/src/dynamo/reasoning_parser/basic_parser.py b/lib/bindings/python/src/dynamo/reasoning_parser/basic_parser.py
@@ -0,0 +1,103 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Sequence, Tuple
+
+# import from __init__.py in the same directory
+from __init__ import BaseReasoningParser
+
+
+class BasicReasoningParser(BaseReasoningParser):
+    """Base class providing two sets of interfaces: one-time and streaming incremental."""
+
+    def __init__(
+        self,
+    ):
+        self.think_start_token = "<think>"
+        self.think_end_token = "</think>"
+        self._in_reasoning = False
+        self.stream_reasoning = True
+
+        self._buffer = ""
+        self.stripped_think_start = False
+
+    def detect_and_parse_reasoning(
+        self, text: str, _token_ids: Sequence[int]
+    ) -> Tuple[str, str]:
+        """
+        One-time parsing: Detects and parses reasoning sections in the provided text.
+        Returns both reasoning content and normal text separately.
+        """
+        in_reasoning = self._in_reasoning or self.think_start_token in text
+
+        if not in_reasoning:
+            return (text, "")
+
+        # The text is considered to be in a reasoning block.
+        processed_text = text.replace(self.think_start_token, "").strip()
+
+        if self.think_end_token not in processed_text:
+            # Assume reasoning was truncated before `</think>` token
+            return ("", processed_text)
+
+        # Extract reasoning content
+        splits = processed_text.split(self.think_end_token, maxsplit=1)
+        reasoning_text = splits[0]
+        normal_text = splits[1].strip()
+
+        return (normal_text, reasoning_text)
+
+    def parse_reasoning_streaming_incremental(
+        self, new_text: str, _token_ids: Sequence[int]
+    ) -> Tuple[str, str]:
+        """
+        Streaming incremental parsing for reasoning content.
+        Handles partial reasoning tags and content.
+
+        If stream_reasoning is False:
+            Accumulates reasoning content until the end tag is found
+        If stream_reasoning is True:
+            Streams reasoning content as it arrives
+        """
+        self._buffer += new_text
+        current_text = self._buffer
+
+        # If the current text is a prefix of the think token, keep buffering
+        if any(
+            token.startswith(current_text) and token != current_text
+            for token in [self.think_start_token, self.think_end_token]
+        ):
+            return ("", "")
+
+        # Strip `<think>` token if present
+        if not self.stripped_think_start and self.think_start_token in current_text:
+            current_text = current_text.replace(self.think_start_token, "")
+            self.stripped_think_start = True
+            self._in_reasoning = True
+
+        # Handle end of reasoning block
+        if self._in_reasoning and self.think_end_token in current_text:
+            end_idx = current_text.find(self.think_end_token)
+
+            reasoning_text = current_text[:end_idx]
+
+            self._buffer = ""
+            self._in_reasoning = False
+            normal_text = current_text[end_idx + len(self.think_end_token) :]
+
+            return (normal_text, reasoning_text.rstrip())
+
+        # Continue with reasoning content
+        if self._in_reasoning:
+            if self.stream_reasoning:
+                self._buffer = ""
+                return ("", current_text)
+            else:
+                return ("", "")
+
+        # If we're not in a reasoning block return as normal text
+        if not self._in_reasoning:
+            self._buffer = ""
+            return (current_text, "")
+
+        return ("", "")
diff --git a/lib/parsers/Cargo.toml b/lib/parsers/Cargo.toml
@@ -35,3 +35,4 @@ uuid = { workspace = true }
 regex = "1"
 openai-harmony = "0.0.3"
 lazy_static = "1.5.0"
+minijinja = "2.12.0"
diff --git a/lib/parsers/src/reasoning/mod.rs b/lib/parsers/src/reasoning/mod.rs
@@ -4,11 +4,13 @@
 mod base_parser;
 mod deepseek_r1_parser;
 mod gpt_oss_parser;
+mod python_process_parser;
 
 // Re-export main types and functions for convenience
 pub use base_parser::BasicReasoningParser;
 pub use deepseek_r1_parser::DeepseekR1ReasoningParser;
 pub use gpt_oss_parser::GptOssReasoningParser;
+pub use python_process_parser::PythonProcessParser;
 
 #[derive(Debug, Clone, Default)]
 pub struct ParserResult {
@@ -116,16 +118,22 @@ impl ReasoningParserType {
         }
     }
 
-    pub fn get_reasoning_parser_from_name(name: &str) -> ReasoningParserWrapper {
-        tracing::debug!("Selected reasoning parser: {}", name);
-        match name.to_lowercase().as_str() {
+    pub fn get_reasoning_parser_from_name(name_or_path: &str) -> ReasoningParserWrapper {
+        tracing::debug!("Selected reasoning parser: {}", name_or_path);
+        // check if name_or_path is a file path
+        if std::path::Path::new(name_or_path).exists() {
+            return ReasoningParserWrapper {
+                parser: Box::new(PythonProcessParser::new(name_or_path)),
+            };
+        }
+        match name_or_path.to_lowercase().as_str() {
             "deepseek_r1" => Self::DeepseekR1.get_reasoning_parser(),
             "basic" => Self::Basic.get_reasoning_parser(),
             "gpt_oss" => Self::GptOss.get_reasoning_parser(),
             _ => {
                 tracing::warn!(
                     "Unknown reasoning parser type '{}', falling back to Basic Reasoning Parser",
-                    name
+                    name_or_path
                 );
                 Self::Basic.get_reasoning_parser()
             }
diff --git a/lib/parsers/src/reasoning/python_process_parser.rs b/lib/parsers/src/reasoning/python_process_parser.rs

Original file line number	Diff line number	Diff line change
`@@ -117,7 +117,7 @@ def parse_args() -> Config:`
`117`	`117`	`"--dyn-reasoning-parser",`
`118`	`118`	`type=str,`
`119`	`119`	`default=None,`
`120`		`- help="Reasoning parser name for the model. Available options: 'basic', 'deepseek_r1', 'gpt_oss'.",`
	`120`	+ help="Reasoning parser name for the model. Available options: 'basic', 'deepseek_r1', 'gpt_oss'. This can also be a file path to a custom Python reasoning parser implementation of the `dynamo.reasoning_parser.BaseReasoningParser` interface.",
`121`	`121`	`)`
`122`	`122`
`123`	`123`	`parser = AsyncEngineArgs.add_cli_args(parser)`