Add Olmo 3 reasoning parser (vllm-project#26054)

soldni · xuebwang-amd · commit 50d9216e268b · 2025-10-24T09:19:23.000Z
Signed-off-by: Luca Soldaini &lt;luca@soldaini.net&gt;
Signed-off-by: xuebwang-amd &lt;xuebwang@amd.com&gt;
diff --git a/tests/reasoning/test_olmo3_reasoning_parser.py b/tests/reasoning/test_olmo3_reasoning_parser.py
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "olmo3"
+START_REASONING = "<think>"
+END_REASONING = "</think>"
+
+NO_REASONING = {
+    "output": f"{START_REASONING}{END_REASONING}No thoughts, head empty!",
+    "reasoning_content": None,
+    "content": "No thoughts, head empty!",
+}
+
+NO_REASONING_WITH_NEWLINE = {
+    "output":
+    f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
+    "reasoning_content": "\n",
+    "content": "\n\nNo thoughts, head empty!",
+}
+
+SIMPLE_REASONING = {
+    "output":
+    f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest",  # noqa: E501
+    "reasoning_content": "This is a reasoning section",
+    "content": "This is the rest",
+}
+
+SIMPLE_REASONING_WITH_NEWLINE = {
+    "output":
+    f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest",  # noqa: E501
+    "reasoning_content": " Look!\n\nI'm thinking...",
+    "content": "\nThis is the rest",
+}
+
+SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES = {
+    "output":
+    f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest",  # noqa: E501
+    "reasoning_content": "\nLook!\nI'm thinking...\n\n",
+    "content": "\n\n\nThis is the rest",
+}
+
+NO_REASONING_ONLY_END_THINK = {
+    "output": f"{END_REASONING}\n\nNo thoughts, head empty!",
+    "reasoning_content": None,
+    "content": "\n\nNo thoughts, head empty!",
+}
+
+REASONING_ONLY_END_THINK = {
+    "output":
+    f"The user is asking me not to think.{END_REASONING}No thoughts!",
+    "reasoning_content": "The user is asking me not to think.",
+    "content": "No thoughts!",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,  # not streaming
+        NO_REASONING,
+        id="no_reasoning",
+    ),
+    pytest.param(
+        False,  # not streaming
+        NO_REASONING_WITH_NEWLINE,
+        id="no_reasoning_with_newline",
+    ),
+    pytest.param(
+        False,  # not streaming
+        SIMPLE_REASONING,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        False,  # not streaming
+        SIMPLE_REASONING_WITH_NEWLINE,
+        id="simple_reasoning_with_newline",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
+        id="simple_reasoning_with_multiple_newlines",
+    ),
+    pytest.param(
+        False,  # not streaming
+        NO_REASONING_ONLY_END_THINK,
+        id="no_reasoning_only_end_think",
+    ),
+    pytest.param(
+        False,  # not streaming
+        REASONING_ONLY_END_THINK,
+        id="yes_reasoning_only_end_think",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        NO_REASONING,
+        id="no_reasoning_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        NO_REASONING_WITH_NEWLINE,
+        id="no_reasoning_with_newline_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_NEWLINE,
+        id="simple_reasoning_with_newline_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
+        id="simple_reasoning_with_multiple_newlines_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        NO_REASONING_ONLY_END_THINK,
+        id="no_reasoning_only_end_think_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        REASONING_ONLY_END_THINK,
+        id="yes_reasoning_only_end_think_streaming",
+    ),
+]
+
+# Global tokenizer initialization to avoid repeated loading
+tokenizer = AutoTokenizer.from_pretrained("allenai/dolma2-tokenizer")
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict[str, str],
+):
+    output = tokenizer.tokenize(param_dict["output"])
+
+    # decode everything to tokens
+    model_output: list[str] = [
+        tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser: ReasoningParser = parser_cls(tokenizer)
+
+    reasoning, content = run_reasoning_extraction(reasoning_parser=parser,
+                                                  model_output=model_output,
+                                                  streaming=streaming)
+
+    assert reasoning == param_dict["reasoning_content"]
+    assert content == param_dict["content"]
diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py
@@ -9,6 +9,7 @@
 from .granite_reasoning_parser import GraniteReasoningParser
 from .hunyuan_a13b_reasoning_parser import HunyuanA13BReasoningParser
 from .mistral_reasoning_parser import MistralReasoningParser
+from .olmo3_reasoning_parser import Olmo3ReasoningParser
 from .qwen3_reasoning_parser import Qwen3ReasoningParser
 from .seedoss_reasoning_parser import SeedOSSReasoningParser
 from .step3_reasoning_parser import Step3ReasoningParser
@@ -23,6 +24,7 @@
     "Qwen3ReasoningParser",
     "Glm4MoeModelReasoningParser",
     "MistralReasoningParser",
+    "Olmo3ReasoningParser",
     "Step3ReasoningParser",
     "GptOssReasoningParser",
     "SeedOSSReasoningParser",
diff --git a/vllm/reasoning/olmo3_reasoning_parser.py b/vllm/reasoning/olmo3_reasoning_parser.py