vllm-project · alecsolder · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
@@ -502,6 +502,35 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:
     return get_encoding().stop_tokens_for_assistant_actions()
 
 
+def get_tool_names_from_messages(messages: list[Message]) -> set[str]:
+    """
+    Returns a set of tool names for the purpose of guided decoding
+    """
+    tool_names: set[str] = set()
+    for message in messages:
+        if message.author.role == Role.SYSTEM or message.author.role == Role.DEVELOPER:
+            assert len(message.content) == 1, (
+                f"SYSTEM/DEVELOPER messages should have exactly 1 content item, "
+                f"got {len(message.content)}"
+            )
+            message_content = message.content[0]
+            assert isinstance(message_content, (SystemContent, DeveloperContent)), (
+                f"SYSTEM/DEVELOPER message content should be SystemContent or "
+                f"DeveloperContent, got {type(message_content).__name__}"
+            )
+            tool_namespace_configs = (
+                message_content.tools.values() if message_content.tools else []
+            )
+            for tool_namespace_config in tool_namespace_configs:
+                # gpt-oss special case for python tool not needing a namespace
+                if tool_namespace_config.name == "python":
+                    tool_names.add("python")
+                    continue
+                for tool in tool_namespace_config.tools:
+                    tool_names.add(f"{tool_namespace_config.name}.{tool.name}")
+    return tool_names
+
+
 def get_streamable_parser_for_assistant() -> StreamableParser:
     return StreamableParser(get_encoding(), role=Role.ASSISTANT)
 

@@ -1776,11 +1776,11 @@ async def init_app_state(
             enable_auto_tools=args.enable_auto_tool_choice,
             tool_parser=args.tool_call_parser,
             tool_server=tool_server,
-            reasoning_parser=args.structured_outputs_config.reasoning_parser,
             enable_prompt_tokens_details=args.enable_prompt_tokens_details,
             enable_force_include_usage=args.enable_force_include_usage,
             enable_log_outputs=args.enable_log_outputs,
             log_error_stack=args.log_error_stack,
+            structured_outputs_config=args.structured_outputs_config,
         )
         if "generate" in supported_tasks
         else None

@@ -252,6 +252,8 @@ async def create_chat_completion(
                     request_prompts,
                     engine_prompts,
                 ) = self._make_request_with_harmony(request)
+                # TODO: Add gptoss reasoning parser prepare_structured_tag
+                # here like in serving_responses
         except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
             logger.exception("Error in preprocessing prompt inputs")
             return self.create_error_response(f"{e} {e.__cause__}")

@@ -52,6 +52,7 @@
 from openai_harmony import Message as OpenAIHarmonyMessage
 
 from vllm import envs
+from vllm.config.structured_outputs import StructuredOutputsConfig
 from vllm.engine.protocol import EngineClient
 from vllm.entrypoints.chat_utils import (
     ChatCompletionMessageParam,
@@ -68,6 +69,7 @@
     get_developer_message,
     get_stop_tokens_for_assistant_actions,
     get_system_message,
+    get_tool_names_from_messages,
     get_user_message,
     has_custom_tools,
     parse_output_message,
@@ -134,14 +136,14 @@ def __init__(
         chat_template: str | None,
         chat_template_content_format: ChatTemplateContentFormatOption,
         return_tokens_as_token_ids: bool = False,
-        reasoning_parser: str = "",
         enable_auto_tools: bool = False,
         tool_parser: str | None = None,
         tool_server: ToolServer | None = None,
         enable_prompt_tokens_details: bool = False,
         enable_force_include_usage: bool = False,
         enable_log_outputs: bool = False,
         log_error_stack: bool = False,
+        structured_outputs_config: StructuredOutputsConfig | None = None,
     ) -> None:
         super().__init__(
             engine_client=engine_client,
@@ -156,8 +158,11 @@ def __init__(
         self.enable_log_outputs = enable_log_outputs
 
         self.reasoning_parser = self._get_reasoning_parser(
-            reasoning_parser_name=reasoning_parser
+            ""
+            if not structured_outputs_config
+            else structured_outputs_config.reasoning_parser
         )
+        self.structured_outputs_config = structured_outputs_config
         self.enable_prompt_tokens_details = enable_prompt_tokens_details
         self.enable_force_include_usage = enable_force_include_usage
         self.default_sampling_params = self.model_config.get_diff_sampling_param()
@@ -392,7 +397,14 @@ async def create_responses(
                 else:
                     context = SimpleContext()
 
-                if self.reasoning_parser is not None:
+                # Enable in reasoning must be true since structural tags are
+                # currently used to guide the harmony chat format
+                # which is technically in the reasoning, not the content
+                if (
+                    self.reasoning_parser is not None
+                    and self.structured_outputs_config
+                    and self.structured_outputs_config.enable_in_reasoning
+                ):
                     reasoning_parser = self.reasoning_parser(tokenizer)
                     if sampling_params.structured_outputs is None:
                         sampling_params.structured_outputs = StructuredOutputsParams()
@@ -401,7 +413,7 @@ async def create_responses(
                         sampling_params.structured_outputs.structural_tag = (
                             reasoning_parser.prepare_structured_tag(
                                 sampling_params.structured_outputs.structural_tag,
-                                self.tool_server,
+                                get_tool_names_from_messages(messages),
                             )
                         )
                 generator = self._generate_with_builtin_tools(

@@ -7,7 +7,6 @@
 from functools import cached_property
 from typing import TYPE_CHECKING, Any
 
-from vllm.entrypoints.tool_server import ToolServer
 from vllm.logger import init_logger
 from vllm.utils.collection_utils import is_list_of
 from vllm.utils.import_utils import import_from_path
@@ -119,7 +118,7 @@ def extract_reasoning_content_streaming(
     def prepare_structured_tag(
         self,
         original_tag: str | None,
-        tool_server: ToolServer | None,
+        tool_names: set[str] | None = None,
     ) -> str:
         """
         Instance method that is implemented for preparing the structured tag

@@ -1,67 +1,155 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
 import json
 from collections.abc import Sequence
 
 from transformers import PreTrainedTokenizerBase
 
 from vllm.entrypoints.harmony_utils import parse_chat_output
 from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
-from vllm.entrypoints.tool_server import ToolServer
 from vllm.logger import init_logger
 from vllm.reasoning import ReasoningParser, ReasoningParserManager
 
 logger = init_logger(__name__)
 
-no_func_reaonsing_tag = {
+TRIGGERS = ["<|channel|>", "<|start|>assistant"]
+BASE_TAGS = [
+    # Allow normal reasoning messages as the first message
+    {
+        "type": "tag",
+        "begin": "<|channel|>analysis",
+        "content": {"type": "regex", "pattern": "(?:)"},
+        "end": "<|message|>",
+    },
+    {
+        "type": "tag",
+        "begin": "<|channel|>commentary",
+        "content": {"type": "regex", "pattern": "(?:)"},
+        "end": "<|message|>",
+    },
+    # Allow final messages as the first message
+    {
+        "type": "tag",
+        "begin": "<|channel|>final",
+        "content": {"type": "regex", "pattern": "(?:)"},
+        "end": "<|message|>",
+    },
+    # Allow final messages as the last message
+    {
+        "type": "tag",
+        "begin": "<|start|>assistant<|channel|>final",
+        "content": {"type": "regex", "pattern": "(?:)"},
+        "end": "<|message|>",
+    },
+    # The same cases, but when the model tends to
+    # will use <|constrain|>json when the user is asking for json output
+    {
+        "type": "tag",
+        "begin": "<|channel|>final <|constrain|>json",
+        "content": {"type": "regex", "pattern": "(?:)"},
+        "end": "<|message|>",
+    },
+    {
+        "type": "tag",
+        "begin": "<|start|>assistant<|channel|>final <|constrain|>json",
+        "content": {"type": "regex", "pattern": "(?:)"},
+        "end": "<|message|>",
+    },
+]
+
+
+STRUCTURAL_TAG_TEMPLATE = {
     "type": "structural_tag",
     "format": {
         "type": "triggered_tags",
-        "tags": [
-            {
-                "begin": "<|channel|>analysis<|message|>",
-                "content": {"type": "any_text"},
-                "end": "<|end|>",
-            }
-        ],
-        "triggers": ["<|channel|>analysis"],
+        "triggers": ["<|channel|>", "<|start|>assistant"],
+        "tags": [],
+        "at_least_one": True,
         "stop_after_first": False,
     },
 }
 
 
-def from_builtin_tool_to_tag(tool: str) -> list[dict]:
-    tag = [
+def create_tool_tags(
+    channel_name: str, tool_name: str, content_type: str | None = None
+) -> list[dict]:
+    """
+    Generate tool-specific tags based on channel name and tool name.
+
+    Args:
+        channel_name: The channel name (e.g., "analysis", "commentary")
+        tool_name: The tool name (e.g., "python", "container")
+        content_type: Optional explicit content type. If not provided,
+                      inferred from channel.
+
+    Returns:
+        List of two tag dictionaries for first and last message positions
+    """
+    if content_type is None:
+        analysis_content_type = "code"
+        commentary_content_type = "<|constrain|>json"
+        content_type = (
+            analysis_content_type
+            if channel_name == "analysis"
+            else commentary_content_type
+        )
+
+    return [
+        # Tool as first message
         {
-            "begin": f"<|channel|>commentary to={tool}",
-            "content": {"type": "any_text"},
-            "end": "<|end|>",
+            "type": "tag",
+            "begin": f"<|channel|>{channel_name} to={tool_name}",
+            "content": {"type": "regex", "pattern": "(?:)"},
+            "end": f" {content_type}<|message|>",
         },
+        # Tool as last message
+        # It is critical to have this as the model often makes mistakes
+        # between `<|start|>assistant` and `<|channel|>` tags
+        # so there needs to be an extra case to prevent it
         {
-            "begin": f"<|channel|>analysis to={tool}",
-            "content": {"type": "any_text"},
-            "end": "<|end|>",
+            "type": "tag",
+            "begin": f"<|start|>assistant<|channel|>{channel_name} to={tool_name}",
+            "content": {"type": "regex", "pattern": "(?:)"},
+            "end": f" {content_type}<|message|>",
         },
     ]
-    return tag
 
 
-def tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list: list[str]) -> dict:
-    import copy
-
-    new_tag = copy.deepcopy(no_func_reaonsing_tag)
-    new_tag["format"]["triggers"].append("<|channel|>commentary to=")
-
-    for tool in builtin_tool_list:
-        new_tag["format"]["tags"].extend(from_builtin_tool_to_tag(tool))
-    return new_tag
+def get_structural_tags(analysis_tools: set[str], commentary_tools: set[str]):
+    # Start with base tags, but conditionally include commentary tag
+    if commentary_tools:
+        # Include all BASE_TAGS if there are commentary tools
+        tags = BASE_TAGS.copy()
+    else:
+        # Exclude commentary BASE_TAG if no commentary tools
+        tags = [tag for tag in BASE_TAGS if tag["begin"] != "<|channel|>commentary"]
+
+    # Add tool-specific tags for commentary channel
+    for tool_name in commentary_tools:
+        if tool_name:  # Skip empty strings from split
+            tags.extend(create_tool_tags("commentary", tool_name))
+
+    # Add tool-specific tags for analysis channel
+    for tool_name in analysis_tools:
+        if tool_name:  # Skip empty strings from split
+            tags.extend(create_tool_tags("analysis", tool_name))
+            # If commentary tools exist, also allow analysis tools on commentary
+            # This handles model training issue where it flips between channels
+            # Use "code" content type (analysis tools keep their format)
+            if commentary_tools:
+                tags.extend(create_tool_tags("commentary", tool_name, "code"))
+
+    # Build the complete structural tag
+    structural_tags = copy.deepcopy(STRUCTURAL_TAG_TEMPLATE)
+    structural_tags["format"]["tags"] = tags
+    return json.dumps(structural_tags)
 
 
 @ReasoningParserManager.register_module("openai_gptoss")
 class GptOssReasoningParser(ReasoningParser):
     """
     Reasoning parser for GptOss model.
-
     The GptOss model uses harmony to extract reasoning content and this parser
     is only used for detecting the end of the reasoning content.
     """
@@ -128,30 +216,19 @@ def extract_reasoning_content(
 
     # This function prepares the structural tag to format reasoning output
     def prepare_structured_tag(
-        self, original_tag: str | None, tool_server: ToolServer | None
+        self,
+        original_tag: str | None,
+        tool_names: set[str] | None = None,
     ) -> str:
-        if original_tag is None:
-            if tool_server is None:
-                return json.dumps(no_func_reaonsing_tag)
-            else:
-                builtin_tool_list: list[str] = []
-                if tool_server.has_tool("browser"):
-                    builtin_tool_list.append("browser")
-                if tool_server.has_tool("python"):
-                    builtin_tool_list.append("python")
-                if tool_server.has_tool("container"):
-                    builtin_tool_list.append("container")
-
-                if len(builtin_tool_list) > 0:
-                    logger.info("Builtin_tool_list: %s", builtin_tool_list)
-                    func_tag = json.dumps(
-                        tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list)
-                    )
-                else:
-                    logger.info("Builtin_tool_list is empty")
-                    func_tag = json.dumps(no_func_reaonsing_tag)
-
-                return func_tag
-        else:
-            # There is potential risk for appending the tag to the original tag
+        if original_tag is not None:
             return original_tag
+        # Easiest way to separate based on channel for now
+        analysis_tools = set()
+        commentary_tools = set()
+        if tool_names:
+            for tool_name in tool_names:
+                if tool_name.startswith("functions"):
+                    commentary_tools.add(tool_name)
+                else:
+                    analysis_tools.add(tool_name)
+        return get_structural_tags(analysis_tools, commentary_tools)