vllm-project
diff --git a/‎docs/features/reasoning_outputs.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/features/reasoning_outputs.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tests/reasoning/test_ernie45_reasoning_parser.py‎
Lines changed: 124 additions & 0 deletions b/‎tests/reasoning/test_ernie45_reasoning_parser.py‎
Lines changed: 124 additions & 0 deletions
@@ -11,6 +11,8 @@ vLLM currently supports the following reasoning models:
 | Model Series | Parser Name | Structured Output Support | Tool Calling |
 |--------------|-------------|------------------|-------------|
 | [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `json`, `regex` | ❌ |
+| [ERNIE-4.5-VL series](https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-PT) | `ernie45` | `json`, `regex` | ❌ |
+| [ERNIE-4.5-21B-A3B-Thinking](https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking) | `ernie45` | `json`, `regex` | ✅ |
 | [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `json`, `regex` | ✅ |
 | [IBM Granite 3.2 language models](https://huggingface.co/collections/ibm-granite/granite-32-language-models-67b3bc8c13508f6d064cff9a) | `granite` | ❌ | ❌ |
 | [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `json`, `regex` | ✅ |
 
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "ernie45"
+
+REASONING_MODEL_NAME = "baidu/ERNIE-4.5-21B-A3B-Thinking"
+
+
+@pytest.fixture(scope="module")
+def ernie45_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# 带 </think>，非stream
+WITH_THINK = {
+    "output": "abc</think>def",
+    "reasoning_content": "abc",
+    "content": "def",
+}
+# 带 </think>，stream
+WITH_THINK_STREAM = {
+    "output": "abc</think>def",
+    "reasoning_content": "abc",
+    "content": "def",
+}
+# without </think>, all is reasoning_content
+WITHOUT_THINK = {
+    "output": "abc",
+    "reasoning_content": "abc",
+    "content": None,
+}
+# without </think>, all is reasoning_content
+WITHOUT_THINK_STREAM = {
+    "output": "abc",
+    "reasoning_content": "abc",
+    "content": None,
+}
+
+COMPLETE_REASONING = {
+    "output": "abc</think>",
+    "reasoning_content": "abc",
+    "content": None,
+}
+MULTILINE_REASONING = {
+    "output": "abc\nABC</think>def\nDEF",
+    "reasoning_content": "abc\nABC",
+    "content": "def\nDEF",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        WITH_THINK,
+        id="with_think",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        id="with_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITHOUT_THINK,
+        id="without_think",
+    ),
+    pytest.param(
+        True,
+        WITHOUT_THINK_STREAM,
+        id="without_think_stream",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        MULTILINE_REASONING,
+        id="multiline_reasoning",
+    ),
+    pytest.param(
+        True,
+        MULTILINE_REASONING,
+        id="multiline_reasoning_stream",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    ernie45_tokenizer,
+):
+    output = ernie45_tokenizer.tokenize(param_dict["output"])
+    output_tokens: list[str] = []
+    for token in output:
+        one_token = ernie45_tokenizer.convert_tokens_to_string([token])
+        if one_token:
+            output_tokens.append(one_token)
+
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        ernie45_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    print()
+
+    assert reasoning == param_dict["reasoning_content"]
+    assert content == param_dict["content"]