From 391cda72845acd2bec1d29366cbfbd2457ddb1ee Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Fri, 14 Mar 2025 15:43:37 +0800 Subject: [PATCH 1/7] [Bugfix] Eliminate regex based check in reasoning Signed-off-by: Ce Gao --- .../test_deepseekr1_reasoning_parser.py | 35 +++++++++++++ .../reasoning/deepseek_r1_reasoning_parser.py | 51 ++++++++++++------- 2 files changed, 68 insertions(+), 18 deletions(-) diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py index 7b6af183a86a..5f936d73874d 100644 --- a/tests/reasoning/test_deepseekr1_reasoning_parser.py +++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py @@ -90,6 +90,21 @@ def deepseek_r1_qwen_tokenizer(): "content": "This is the rest", "is_reasoning_end": True, } +THINK_NO_END = { + "output": "This is a reasoning section", + "reasoning_content": "This is a reasoning section", + "content": None, +} +EMPTY = { + "output": "", + "reasoning_content": "", + "content": None, +} +EMPTY_STREAMING = { + "output": "", + "reasoning_content": None, + "content": None, +} TEST_CASES = [ pytest.param( @@ -182,6 +197,26 @@ def deepseek_r1_qwen_tokenizer(): SHORTEST_REASONING_WITH_THINK, id="shortest_with_think_streaming", ), + pytest.param( + False, + THINK_NO_END, + id="think_no_end", + ), + pytest.param( + True, + THINK_NO_END, + id="think_no_end_streaming", + ), + pytest.param( + False, + EMPTY, + id="empty", + ), + pytest.param( + True, + EMPTY_STREAMING, + id="empty_streaming", + ), ] diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index 73be6d4d1ab1..b9a6b45c9dbc 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -import re from collections.abc import Sequence from typing import Optional, Union @@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser): def __init__(self, tokenizer: PreTrainedTokenizerBase): super().__init__(tokenizer) - self.reasoning_regex = re.compile( - rf"{self.start_token}(.*?){self.end_token}", re.DOTALL) - if not self.model_tokenizer: raise ValueError( "The model tokenizer must be passed to the ReasoningParser " @@ -143,23 +139,42 @@ def extract_reasoning_content_streaming( def extract_reasoning_content( self, model_output: str, request: ChatCompletionRequest ) -> tuple[Optional[str], Optional[str]]: + """ + Extract reasoning content from the model output. + + For text abcxyz: + - 'abc' goes to reasoning_content + - 'xyz' goes to content + + Returns: + tuple[Optional[str], Optional[str]]: reasoning content and content + """ + + # Check if the start token is present in the model output, remove it + # if it is present. + start_token_index = model_output.find(self.think_start_token) + if start_token_index != -1: + model_output = model_output[start_token_index + + len(self.think_start_token):] + # DeepSeek R1 doesn't generate now. # Thus we assume the reasoning content is always at the start. # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f if self.end_token not in model_output: return model_output, None else: - # Add a start token if it's missing to keep compatibility. - if self.start_token not in model_output: - model_output = f"{self.start_token}{model_output}" - # Use a regex to find the reasoning content - reasoning_content = self.reasoning_regex.findall(model_output)[0] - - end_index = len( - f"{self.start_token}{reasoning_content}{self.end_token}") - final_output = model_output[end_index:] - - if len(final_output) == 0: - return reasoning_content, None - - return reasoning_content, final_output + # Find the end token index in the model output. + end_token_index = model_output.find(self.think_end_token) + # If the end token is not found, return the model output as is. + # It should not happen since we already checked for the presence + # of the end token. + if end_token_index == -1: + return model_output, None + # Extract the reasoning content before the end token. + reasoning_content = model_output[:end_token_index] + # Extract the content after the end token. + content = model_output[end_token_index + + len(self.think_end_token):] + if len(content) == 0: + content = None + return reasoning_content, content From a0612d176239ab770baa3e7ae8413b513ba0ded3 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Fri, 14 Mar 2025 22:22:30 +0800 Subject: [PATCH 2/7] fix: Update Signed-off-by: Ce Gao --- vllm/reasoning/deepseek_r1_reasoning_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index b9a6b45c9dbc..c51ccf4f9e51 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -176,5 +176,5 @@ def extract_reasoning_content( content = model_output[end_token_index + len(self.think_end_token):] if len(content) == 0: - content = None + return reasoning_content, None return reasoning_content, content From 5de91096b0ff2b5aee9ad1575bca3d2c720651f1 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Sun, 16 Mar 2025 10:42:32 +0800 Subject: [PATCH 3/7] Address comments Signed-off-by: Ce Gao --- .../reasoning/deepseek_r1_reasoning_parser.py | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index c51ccf4f9e51..26a9e8fc9309 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -152,10 +152,9 @@ def extract_reasoning_content( # Check if the start token is present in the model output, remove it # if it is present. - start_token_index = model_output.find(self.think_start_token) - if start_token_index != -1: - model_output = model_output[start_token_index + - len(self.think_start_token):] + model_output_parts = model_output.partition(self.think_start_token) + model_output = model_output_parts[2] if model_output_parts[ + 1] else model_output_parts[0] # DeepSeek R1 doesn't generate now. # Thus we assume the reasoning content is always at the start. @@ -163,18 +162,11 @@ def extract_reasoning_content( if self.end_token not in model_output: return model_output, None else: - # Find the end token index in the model output. - end_token_index = model_output.find(self.think_end_token) + reasoning_content, end, content = model_output.partition( + self.think_end_token) # If the end token is not found, return the model output as is. # It should not happen since we already checked for the presence # of the end token. - if end_token_index == -1: - return model_output, None - # Extract the reasoning content before the end token. - reasoning_content = model_output[:end_token_index] - # Extract the content after the end token. - content = model_output[end_token_index + - len(self.think_end_token):] - if len(content) == 0: - return reasoning_content, None + # If generation stops right after end-of-think, return null content + content = content or None return reasoning_content, content From 8afe808046ecd559137665829c1333185a1013b0 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Sun, 16 Mar 2025 10:46:26 +0800 Subject: [PATCH 4/7] Update Signed-off-by: Ce Gao --- vllm/reasoning/deepseek_r1_reasoning_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index 26a9e8fc9309..0a414f666f32 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -162,7 +162,7 @@ def extract_reasoning_content( if self.end_token not in model_output: return model_output, None else: - reasoning_content, end, content = model_output.partition( + reasoning_content, _, content = model_output.partition( self.think_end_token) # If the end token is not found, return the model output as is. # It should not happen since we already checked for the presence From 99aa88366688c768f6b05adc20bfb8a3782521d7 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Sun, 16 Mar 2025 10:54:59 +0800 Subject: [PATCH 5/7] add more test cases Signed-off-by: Ce Gao --- .../test_deepseekr1_reasoning_parser.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py index 5f936d73874d..2f7c3f33d74c 100644 --- a/tests/reasoning/test_deepseekr1_reasoning_parser.py +++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py @@ -105,6 +105,20 @@ def deepseek_r1_qwen_tokenizer(): "reasoning_content": None, "content": None, } +NEW_LINE = { + "output": "\nThis is a reasoning section\nThis is the rest", + "reasoning_content": "This is a reasoning section", + "content": "\nThis is the rest", +} +# Streaming cannot handle new lines at the beginning of the output +# because we need to support ... and ... +# We cannot know if the text before is reasoning content +# or not. +NEW_LINE_STREAMING = { + "output": "\nThis is a reasoning section\nThis is the rest", + "reasoning_content": "\nThis is a reasoning section", + "content": "\nThis is the rest", +} TEST_CASES = [ pytest.param( @@ -217,6 +231,16 @@ def deepseek_r1_qwen_tokenizer(): EMPTY_STREAMING, id="empty_streaming", ), + pytest.param( + False, + NEW_LINE, + id="new_line", + ), + pytest.param( + True, + NEW_LINE_STREAMING, + id="new_line_streaming", + ), ] From ea5967a989674795ab6a7722ee1c99892d3fee22 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Sun, 16 Mar 2025 11:00:31 +0800 Subject: [PATCH 6/7] Fix pre-commit Signed-off-by: Ce Gao --- vllm/reasoning/deepseek_r1_reasoning_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index 0a414f666f32..c6cddd0c8eb8 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -168,5 +168,5 @@ def extract_reasoning_content( # It should not happen since we already checked for the presence # of the end token. # If generation stops right after end-of-think, return null content - content = content or None - return reasoning_content, content + final_content = content or None + return reasoning_content, final_content From 3cb53201ea1cc7fa8fa836d86bbd52af1dc1ca32 Mon Sep 17 00:00:00 2001 From: Ce Gao Date: Fri, 28 Mar 2025 16:10:32 +0800 Subject: [PATCH 7/7] rebase Signed-off-by: Ce Gao --- tests/reasoning/test_deepseekr1_reasoning_parser.py | 5 +++++ vllm/reasoning/deepseek_r1_reasoning_parser.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/reasoning/test_deepseekr1_reasoning_parser.py b/tests/reasoning/test_deepseekr1_reasoning_parser.py index 2f7c3f33d74c..1b669c8fd2fb 100644 --- a/tests/reasoning/test_deepseekr1_reasoning_parser.py +++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py @@ -94,21 +94,25 @@ def deepseek_r1_qwen_tokenizer(): "output": "This is a reasoning section", "reasoning_content": "This is a reasoning section", "content": None, + "is_reasoning_end": False, } EMPTY = { "output": "", "reasoning_content": "", "content": None, + "is_reasoning_end": False, } EMPTY_STREAMING = { "output": "", "reasoning_content": None, "content": None, + "is_reasoning_end": False, } NEW_LINE = { "output": "\nThis is a reasoning section\nThis is the rest", "reasoning_content": "This is a reasoning section", "content": "\nThis is the rest", + "is_reasoning_end": True, } # Streaming cannot handle new lines at the beginning of the output # because we need to support ... and ... @@ -118,6 +122,7 @@ def deepseek_r1_qwen_tokenizer(): "output": "\nThis is a reasoning section\nThis is the rest", "reasoning_content": "\nThis is a reasoning section", "content": "\nThis is the rest", + "is_reasoning_end": True, } TEST_CASES = [ diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index c6cddd0c8eb8..1c283c092a28 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -152,7 +152,7 @@ def extract_reasoning_content( # Check if the start token is present in the model output, remove it # if it is present. - model_output_parts = model_output.partition(self.think_start_token) + model_output_parts = model_output.partition(self.start_token) model_output = model_output_parts[2] if model_output_parts[ 1] else model_output_parts[0] @@ -163,7 +163,7 @@ def extract_reasoning_content( return model_output, None else: reasoning_content, _, content = model_output.partition( - self.think_end_token) + self.end_token) # If the end token is not found, return the model output as is. # It should not happen since we already checked for the presence # of the end token.