From 391cda72845acd2bec1d29366cbfbd2457ddb1ee Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Fri, 14 Mar 2025 15:43:37 +0800
Subject: [PATCH 1/7] [Bugfix] Eliminate regex based check in reasoning

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 .../test_deepseekr1_reasoning_parser.py       | 35 +++++++++++++
 .../reasoning/deepseek_r1_reasoning_parser.py | 51 ++++++++++++-------
 2 files changed, 68 insertions(+), 18 deletions(-)
diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py
index 7b6af183a86a..5f936d73874d 100644
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -90,6 +90,21 @@ def deepseek_r1_qwen_tokenizer():
     "content": "This is the rest",
     "is_reasoning_end": True,
 }
+THINK_NO_END = {
+    "output": "<think>This is a reasoning section",
+    "reasoning_content": "This is a reasoning section",
+    "content": None,
+}
+EMPTY = {
+    "output": "",
+    "reasoning_content": "",
+    "content": None,
+}
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning_content": None,
+    "content": None,
+}
 
 TEST_CASES = [
     pytest.param(
@@ -182,6 +197,26 @@ def deepseek_r1_qwen_tokenizer():
         SHORTEST_REASONING_WITH_THINK,
         id="shortest_with_think_streaming",
     ),
+    pytest.param(
+        False,
+        THINK_NO_END,
+        id="think_no_end",
+    ),
+    pytest.param(
+        True,
+        THINK_NO_END,
+        id="think_no_end_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        id="empty_streaming",
+    ),
 ]
 
 
diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index 73be6d4d1ab1..b9a6b45c9dbc 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 
-import re
 from collections.abc import Sequence
 from typing import Optional, Union
 
@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
     def __init__(self, tokenizer: PreTrainedTokenizerBase):
         super().__init__(tokenizer)
 
-        self.reasoning_regex = re.compile(
-            rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)
-
         if not self.model_tokenizer:
             raise ValueError(
                 "The model tokenizer must be passed to the ReasoningParser "
@@ -143,23 +139,42 @@ def extract_reasoning_content_streaming(
     def extract_reasoning_content(
             self, model_output: str, request: ChatCompletionRequest
     ) -> tuple[Optional[str], Optional[str]]:
+        """
+        Extract reasoning content from the model output.
+
+        For text <think>abc</think>xyz:
+        - 'abc' goes to reasoning_content
+        - 'xyz' goes to content
+
+        Returns:
+            tuple[Optional[str], Optional[str]]: reasoning content and content
+        """
+
+        # Check if the start token is present in the model output, remove it
+        # if it is present.
+        start_token_index = model_output.find(self.think_start_token)
+        if start_token_index != -1:
+            model_output = model_output[start_token_index +
+                                        len(self.think_start_token):]
+
         # DeepSeek R1 doesn't generate <think> now.
         # Thus we assume the reasoning content is always at the start.
         # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
         if self.end_token not in model_output:
             return model_output, None
         else:
-            # Add a start token if it's missing to keep compatibility.
-            if self.start_token not in model_output:
-                model_output = f"{self.start_token}{model_output}"
-            # Use a regex to find the reasoning content
-            reasoning_content = self.reasoning_regex.findall(model_output)[0]
-
-            end_index = len(
-                f"{self.start_token}{reasoning_content}{self.end_token}")
-            final_output = model_output[end_index:]
-
-            if len(final_output) == 0:
-                return reasoning_content, None
-
-            return reasoning_content, final_output
+            # Find the end token index in the model output.
+            end_token_index = model_output.find(self.think_end_token)
+            # If the end token is not found, return the model output as is.
+            # It should not happen since we already checked for the presence
+            # of the end token.
+            if end_token_index == -1:
+                return model_output, None
+            # Extract the reasoning content before the end token.
+            reasoning_content = model_output[:end_token_index]
+            # Extract the content after the end token.
+            content = model_output[end_token_index +
+                                   len(self.think_end_token):]
+            if len(content) == 0:
+                content = None
+            return reasoning_content, content

From a0612d176239ab770baa3e7ae8413b513ba0ded3 Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Fri, 14 Mar 2025 22:22:30 +0800
Subject: [PATCH 2/7] fix: Update

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 vllm/reasoning/deepseek_r1_reasoning_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index b9a6b45c9dbc..c51ccf4f9e51 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -176,5 +176,5 @@ def extract_reasoning_content(
             content = model_output[end_token_index +
                                    len(self.think_end_token):]
             if len(content) == 0:
-                content = None
+                return reasoning_content, None
             return reasoning_content, content

From 5de91096b0ff2b5aee9ad1575bca3d2c720651f1 Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Sun, 16 Mar 2025 10:42:32 +0800
Subject: [PATCH 3/7] Address comments

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 .../reasoning/deepseek_r1_reasoning_parser.py | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index c51ccf4f9e51..26a9e8fc9309 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -152,10 +152,9 @@ def extract_reasoning_content(
 
         # Check if the start token is present in the model output, remove it
         # if it is present.
-        start_token_index = model_output.find(self.think_start_token)
-        if start_token_index != -1:
-            model_output = model_output[start_token_index +
-                                        len(self.think_start_token):]
+        model_output_parts = model_output.partition(self.think_start_token)
+        model_output = model_output_parts[2] if model_output_parts[
+            1] else model_output_parts[0]
 
         # DeepSeek R1 doesn't generate <think> now.
         # Thus we assume the reasoning content is always at the start.
@@ -163,18 +162,11 @@ def extract_reasoning_content(
         if self.end_token not in model_output:
             return model_output, None
         else:
-            # Find the end token index in the model output.
-            end_token_index = model_output.find(self.think_end_token)
+            reasoning_content, end, content = model_output.partition(
+                self.think_end_token)
             # If the end token is not found, return the model output as is.
             # It should not happen since we already checked for the presence
             # of the end token.
-            if end_token_index == -1:
-                return model_output, None
-            # Extract the reasoning content before the end token.
-            reasoning_content = model_output[:end_token_index]
-            # Extract the content after the end token.
-            content = model_output[end_token_index +
-                                   len(self.think_end_token):]
-            if len(content) == 0:
-                return reasoning_content, None
+            # If generation stops right after end-of-think, return null content
+            content = content or None
             return reasoning_content, content

From 8afe808046ecd559137665829c1333185a1013b0 Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Sun, 16 Mar 2025 10:46:26 +0800
Subject: [PATCH 4/7] Update

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 vllm/reasoning/deepseek_r1_reasoning_parser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index 26a9e8fc9309..0a414f666f32 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -162,7 +162,7 @@ def extract_reasoning_content(
         if self.end_token not in model_output:
             return model_output, None
         else:
-            reasoning_content, end, content = model_output.partition(
+            reasoning_content, _, content = model_output.partition(
                 self.think_end_token)
             # If the end token is not found, return the model output as is.
             # It should not happen since we already checked for the presence

From 99aa88366688c768f6b05adc20bfb8a3782521d7 Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Sun, 16 Mar 2025 10:54:59 +0800
Subject: [PATCH 5/7] add more test cases

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 .../test_deepseekr1_reasoning_parser.py       | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py
index 5f936d73874d..2f7c3f33d74c 100644
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -105,6 +105,20 @@ def deepseek_r1_qwen_tokenizer():
     "reasoning_content": None,
     "content": None,
 }
+NEW_LINE = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning_content": "This is a reasoning section",
+    "content": "\nThis is the rest",
+}
+# Streaming cannot handle new lines at the beginning of the output
+# because we need to support <think>...</think> and </think>...
+# We cannot know if the text before <think> is reasoning content
+# or not.
+NEW_LINE_STREAMING = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning_content": "\nThis is a reasoning section",
+    "content": "\nThis is the rest",
+}
 
 TEST_CASES = [
     pytest.param(
@@ -217,6 +231,16 @@ def deepseek_r1_qwen_tokenizer():
         EMPTY_STREAMING,
         id="empty_streaming",
     ),
+    pytest.param(
+        False,
+        NEW_LINE,
+        id="new_line",
+    ),
+    pytest.param(
+        True,
+        NEW_LINE_STREAMING,
+        id="new_line_streaming",
+    ),
 ]
 
 

From ea5967a989674795ab6a7722ee1c99892d3fee22 Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Sun, 16 Mar 2025 11:00:31 +0800
Subject: [PATCH 6/7] Fix pre-commit

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 vllm/reasoning/deepseek_r1_reasoning_parser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index 0a414f666f32..c6cddd0c8eb8 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -168,5 +168,5 @@ def extract_reasoning_content(
             # It should not happen since we already checked for the presence
             # of the end token.
             # If generation stops right after end-of-think, return null content
-            content = content or None
-            return reasoning_content, content
+            final_content = content or None
+            return reasoning_content, final_content

From 3cb53201ea1cc7fa8fa836d86bbd52af1dc1ca32 Mon Sep 17 00:00:00 2001
From: Ce Gao <cegao@tensorchord.ai>
Date: Fri, 28 Mar 2025 16:10:32 +0800
Subject: [PATCH 7/7] rebase

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
---
 tests/reasoning/test_deepseekr1_reasoning_parser.py | 5 +++++
 vllm/reasoning/deepseek_r1_reasoning_parser.py      | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py
index 2f7c3f33d74c..1b669c8fd2fb 100644
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -94,21 +94,25 @@ def deepseek_r1_qwen_tokenizer():
     "output": "<think>This is a reasoning section",
     "reasoning_content": "This is a reasoning section",
     "content": None,
+    "is_reasoning_end": False,
 }
 EMPTY = {
     "output": "",
     "reasoning_content": "",
     "content": None,
+    "is_reasoning_end": False,
 }
 EMPTY_STREAMING = {
     "output": "",
     "reasoning_content": None,
     "content": None,
+    "is_reasoning_end": False,
 }
 NEW_LINE = {
     "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
     "reasoning_content": "This is a reasoning section",
     "content": "\nThis is the rest",
+    "is_reasoning_end": True,
 }
 # Streaming cannot handle new lines at the beginning of the output
 # because we need to support <think>...</think> and </think>...
@@ -118,6 +122,7 @@ def deepseek_r1_qwen_tokenizer():
     "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
     "reasoning_content": "\nThis is a reasoning section",
     "content": "\nThis is the rest",
+    "is_reasoning_end": True,
 }
 
 TEST_CASES = [
diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py
index c6cddd0c8eb8..1c283c092a28 100644
--- a/vllm/reasoning/deepseek_r1_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py
@@ -152,7 +152,7 @@ def extract_reasoning_content(
 
         # Check if the start token is present in the model output, remove it
         # if it is present.
-        model_output_parts = model_output.partition(self.think_start_token)
+        model_output_parts = model_output.partition(self.start_token)
         model_output = model_output_parts[2] if model_output_parts[
             1] else model_output_parts[0]
 
@@ -163,7 +163,7 @@ def extract_reasoning_content(
             return model_output, None
         else:
             reasoning_content, _, content = model_output.partition(
-                self.think_end_token)
+                self.end_token)
             # If the end token is not found, return the model output as is.
             # It should not happen since we already checked for the presence
             # of the end token.