|
3 | 3 | import pytest |
4 | 4 | from transformers import AutoTokenizer |
5 | 5 |
|
6 | | -from tests.entrypoints.openai.reasoning_parsers.utils import ( |
7 | | - run_reasoning_extraction) |
8 | | -from vllm.entrypoints.openai.reasoning_parsers import (ReasoningParser, |
9 | | - ReasoningParserManager) |
| 6 | +from tests.reasoning.utils import run_reasoning_extraction |
| 7 | +from vllm.reasoning import ReasoningParser, ReasoningParserManager |
10 | 8 |
|
11 | 9 | parser_name = "deepseek_r1" |
12 | 10 | start_token = "<think>" |
13 | 11 | end_token = "</think>" |
14 | 12 |
|
| 13 | +REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" |
| 14 | + |
| 15 | + |
| 16 | +@pytest.fixture(scope="module") |
| 17 | +def deepseek_r1_qwen_tokenizer(): |
| 18 | + return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME) |
| 19 | + |
| 20 | + |
15 | 21 | SIMPLE_REASONING = { |
16 | 22 | "output": "This is a reasoning section</think>This is the rest", |
17 | 23 | "reasoning_content": "This is a reasoning section", |
18 | 24 | "content": "This is the rest", |
| 25 | + "is_reasoning_end": True, |
19 | 26 | } |
20 | 27 | COMPLETE_REASONING = { |
21 | 28 | "output": "This is a reasoning section</think>", |
22 | 29 | "reasoning_content": "This is a reasoning section", |
23 | 30 | "content": None, |
| 31 | + "is_reasoning_end": True, |
24 | 32 | } |
25 | 33 | NO_CONTENT = { |
26 | 34 | "output": "This is content", |
27 | 35 | "reasoning_content": "This is content", |
28 | 36 | "content": None, |
| 37 | + "is_reasoning_end": False, |
29 | 38 | } |
30 | 39 | NO_REASONING_STREAMING = { |
31 | 40 | "output": "This is a reasoning section", |
32 | 41 | "reasoning_content": "This is a reasoning section", |
33 | 42 | "content": None, |
| 43 | + "is_reasoning_end": False, |
34 | 44 | } |
35 | 45 | MULTIPLE_LINES = { |
36 | 46 | "output": "This\nThat</think>This is the rest\nThat", |
37 | 47 | "reasoning_content": "This\nThat", |
38 | 48 | "content": "This is the rest\nThat", |
| 49 | + "is_reasoning_end": True, |
39 | 50 | } |
40 | 51 | SHORTEST_REASONING_NO_STREAMING = { |
41 | 52 | "output": "</think>This is the rest", |
42 | 53 | "reasoning_content": "", |
43 | 54 | "content": "This is the rest", |
| 55 | + "is_reasoning_end": True, |
44 | 56 | } |
45 | 57 | SHORTEST_REASONING = { |
46 | 58 | "output": "</think>This is the rest", |
47 | 59 | "reasoning_content": None, |
48 | 60 | "content": "This is the rest", |
| 61 | + "is_reasoning_end": True, |
49 | 62 | } |
50 | 63 | REASONING_WITH_THINK = { |
51 | 64 | "output": "<think>This is a reasoning section</think>This is the rest", |
52 | 65 | "reasoning_content": "This is a reasoning section", |
53 | 66 | "content": "This is the rest", |
| 67 | + "is_reasoning_end": True, |
54 | 68 | } |
55 | 69 | COMPLETE_REASONING_WITH_THINK = { |
56 | 70 | "output": "<think>This is a reasoning section</think>", |
57 | 71 | "reasoning_content": "This is a reasoning section", |
58 | 72 | "content": None, |
| 73 | + "is_reasoning_end": True, |
59 | 74 | } |
60 | 75 | MULTIPLE_LINES_WITH_THINK = { |
61 | 76 | "output": "<think>This\nThat</think>This is the rest\nThat", |
62 | 77 | "reasoning_content": "This\nThat", |
63 | 78 | "content": "This is the rest\nThat", |
| 79 | + "is_reasoning_end": True, |
64 | 80 | } |
65 | 81 | SHORTEST_REASONING_NO_STREAMING_WITH_THINK = { |
66 | 82 | "output": "</think>This is the rest", |
67 | 83 | "reasoning_content": "", |
68 | 84 | "content": "This is the rest", |
| 85 | + "is_reasoning_end": True, |
69 | 86 | } |
70 | 87 | SHORTEST_REASONING_WITH_THINK = { |
71 | 88 | "output": "</think>This is the rest", |
72 | 89 | "reasoning_content": None, |
73 | 90 | "content": "This is the rest", |
| 91 | + "is_reasoning_end": True, |
74 | 92 | } |
75 | 93 |
|
76 | 94 | TEST_CASES = [ |
|
166 | 184 | ), |
167 | 185 | ] |
168 | 186 |
|
169 | | -# Global tokenizer initialization to avoid repeated loading |
170 | | -tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m") |
171 | | -tokenizer.add_tokens([start_token, end_token]) |
172 | | - |
173 | 187 |
|
174 | 188 | @pytest.mark.parametrize("streaming, param_dict", TEST_CASES) |
175 | 189 | def test_reasoning( |
176 | 190 | streaming: bool, |
177 | 191 | param_dict: dict, |
| 192 | + deepseek_r1_qwen_tokenizer, |
178 | 193 | ): |
179 | | - output = tokenizer.tokenize(param_dict["output"]) |
| 194 | + output = deepseek_r1_qwen_tokenizer.tokenize(param_dict["output"]) |
180 | 195 | # decode everything to tokens |
181 | 196 | output_tokens: list[str] = [ |
182 | | - tokenizer.convert_tokens_to_string([token]) for token in output |
| 197 | + deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token]) |
| 198 | + for token in output |
183 | 199 | ] |
184 | 200 | parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser( |
185 | | - parser_name)(tokenizer) |
| 201 | + parser_name)(deepseek_r1_qwen_tokenizer) |
186 | 202 |
|
187 | 203 | reasoning, content = run_reasoning_extraction(parser, |
188 | 204 | output_tokens, |
189 | 205 | streaming=streaming) |
190 | 206 |
|
191 | 207 | assert reasoning == param_dict["reasoning_content"] |
192 | 208 | assert content == param_dict["content"] |
| 209 | + |
| 210 | + # Test is_reasoning_end |
| 211 | + output_ids = deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(output) |
| 212 | + is_reasoning_end = parser.is_reasoning_end(output_ids) |
| 213 | + assert is_reasoning_end == param_dict["is_reasoning_end"] |
| 214 | + |
| 215 | + # Test extract_content |
| 216 | + if param_dict["content"] is not None: |
| 217 | + content = parser.extract_content_ids(output_ids) |
| 218 | + assert content == deepseek_r1_qwen_tokenizer.convert_tokens_to_ids( |
| 219 | + deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"])) |
| 220 | + else: |
| 221 | + content = parser.extract_content_ids(output) |
| 222 | + assert content == [] |
0 commit comments