Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
612 changes: 473 additions & 139 deletions tests/entrypoints/openai/test_gptoss_structural_tags_integration.py

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions vllm/entrypoints/harmony_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,35 @@ def get_stop_tokens_for_assistant_actions() -> list[int]:
return get_encoding().stop_tokens_for_assistant_actions()


def get_tool_names_from_messages(messages: list[Message]) -> set[str]:
"""
Returns a set of tool names for the purpose of guided decoding
"""
tool_names: set[str] = set()
for message in messages:
if message.author.role == Role.SYSTEM or message.author.role == Role.DEVELOPER:
assert len(message.content) == 1, (
f"SYSTEM/DEVELOPER messages should have exactly 1 content item, "
f"got {len(message.content)}"
)
message_content = message.content[0]
assert isinstance(message_content, (SystemContent, DeveloperContent)), (
f"SYSTEM/DEVELOPER message content should be SystemContent or "
f"DeveloperContent, got {type(message_content).__name__}"
)
tool_namespace_configs = (
message_content.tools.values() if message_content.tools else []
)
for tool_namespace_config in tool_namespace_configs:
# gpt-oss special case for python tool not needing a namespace
if tool_namespace_config.name == "python":
tool_names.add("python")
continue
for tool in tool_namespace_config.tools:
tool_names.add(f"{tool_namespace_config.name}.{tool.name}")
return tool_names


def get_streamable_parser_for_assistant() -> StreamableParser:
return StreamableParser(get_encoding(), role=Role.ASSISTANT)

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1776,11 +1776,11 @@ async def init_app_state(
enable_auto_tools=args.enable_auto_tool_choice,
tool_parser=args.tool_call_parser,
tool_server=tool_server,
reasoning_parser=args.structured_outputs_config.reasoning_parser,
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
enable_force_include_usage=args.enable_force_include_usage,
enable_log_outputs=args.enable_log_outputs,
log_error_stack=args.log_error_stack,
structured_outputs_config=args.structured_outputs_config,
)
if "generate" in supported_tasks
else None
Expand Down
2 changes: 2 additions & 0 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ async def create_chat_completion(
request_prompts,
engine_prompts,
) = self._make_request_with_harmony(request)
# TODO: Add gptoss reasoning parser prepare_structured_tag
# here like in serving_responses
except (ValueError, TypeError, RuntimeError, jinja2.TemplateError) as e:
logger.exception("Error in preprocessing prompt inputs")
return self.create_error_response(f"{e} {e.__cause__}")
Expand Down
20 changes: 16 additions & 4 deletions vllm/entrypoints/openai/serving_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from openai_harmony import Message as OpenAIHarmonyMessage

from vllm import envs
from vllm.config.structured_outputs import StructuredOutputsConfig
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import (
ChatCompletionMessageParam,
Expand All @@ -68,6 +69,7 @@
get_developer_message,
get_stop_tokens_for_assistant_actions,
get_system_message,
get_tool_names_from_messages,
get_user_message,
has_custom_tools,
parse_output_message,
Expand Down Expand Up @@ -134,14 +136,14 @@ def __init__(
chat_template: str | None,
chat_template_content_format: ChatTemplateContentFormatOption,
return_tokens_as_token_ids: bool = False,
reasoning_parser: str = "",
enable_auto_tools: bool = False,
tool_parser: str | None = None,
tool_server: ToolServer | None = None,
enable_prompt_tokens_details: bool = False,
enable_force_include_usage: bool = False,
enable_log_outputs: bool = False,
log_error_stack: bool = False,
structured_outputs_config: StructuredOutputsConfig | None = None,
) -> None:
super().__init__(
engine_client=engine_client,
Expand All @@ -156,8 +158,11 @@ def __init__(
self.enable_log_outputs = enable_log_outputs

self.reasoning_parser = self._get_reasoning_parser(
reasoning_parser_name=reasoning_parser
""
if not structured_outputs_config
else structured_outputs_config.reasoning_parser
)
self.structured_outputs_config = structured_outputs_config
self.enable_prompt_tokens_details = enable_prompt_tokens_details
self.enable_force_include_usage = enable_force_include_usage
self.default_sampling_params = self.model_config.get_diff_sampling_param()
Expand Down Expand Up @@ -392,7 +397,14 @@ async def create_responses(
else:
context = SimpleContext()

if self.reasoning_parser is not None:
# Enable in reasoning must be true since structural tags are
# currently used to guide the harmony chat format
# which is technically in the reasoning, not the content
if (
self.reasoning_parser is not None
and self.structured_outputs_config
and self.structured_outputs_config.enable_in_reasoning
):
reasoning_parser = self.reasoning_parser(tokenizer)
if sampling_params.structured_outputs is None:
sampling_params.structured_outputs = StructuredOutputsParams()
Expand All @@ -401,7 +413,7 @@ async def create_responses(
sampling_params.structured_outputs.structural_tag = (
reasoning_parser.prepare_structured_tag(
sampling_params.structured_outputs.structural_tag,
self.tool_server,
get_tool_names_from_messages(messages),
)
)
generator = self._generate_with_builtin_tools(
Expand Down
3 changes: 1 addition & 2 deletions vllm/reasoning/abs_reasoning_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from functools import cached_property
from typing import TYPE_CHECKING, Any

from vllm.entrypoints.tool_server import ToolServer
from vllm.logger import init_logger
from vllm.utils.collection_utils import is_list_of
from vllm.utils.import_utils import import_from_path
Expand Down Expand Up @@ -119,7 +118,7 @@ def extract_reasoning_content_streaming(
def prepare_structured_tag(
self,
original_tag: str | None,
tool_server: ToolServer | None,
tool_names: set[str] | None = None,
) -> str:
"""
Instance method that is implemented for preparing the structured tag
Expand Down
185 changes: 131 additions & 54 deletions vllm/reasoning/gptoss_reasoning_parser.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,155 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import copy
import json
from collections.abc import Sequence

from transformers import PreTrainedTokenizerBase

from vllm.entrypoints.harmony_utils import parse_chat_output
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.entrypoints.tool_server import ToolServer
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager

logger = init_logger(__name__)

no_func_reaonsing_tag = {
TRIGGERS = ["<|channel|>", "<|start|>assistant"]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wonder if we can define this as some sort of yaml or json files but we enabled default values for these tags. this allows people to modify their template without changing vllm's binary.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed. Maybe it is best that we have a default_template and load it in here?

Copy link
Contributor

@frank-wei frank-wei Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this could be the default template but only if it is at least neutral to general eval tests. Otherwise, people may question about it.
Also, double down on the suggestion from Charlotte if we have the flexibility of passing a json file.

BASE_TAGS = [
# Allow normal reasoning messages as the first message
{
"type": "tag",
"begin": "<|channel|>analysis",
"content": {"type": "regex", "pattern": "(?:)"},
"end": "<|message|>",
},
{
"type": "tag",
"begin": "<|channel|>commentary",
"content": {"type": "regex", "pattern": "(?:)"},
"end": "<|message|>",
},
# Allow final messages as the first message
{
"type": "tag",
"begin": "<|channel|>final",
"content": {"type": "regex", "pattern": "(?:)"},
"end": "<|message|>",
},
# Allow final messages as the last message
{
"type": "tag",
"begin": "<|start|>assistant<|channel|>final",
"content": {"type": "regex", "pattern": "(?:)"},
"end": "<|message|>",
},
# The same cases, but when the model tends to
# will use <|constrain|>json when the user is asking for json output
{
"type": "tag",
"begin": "<|channel|>final <|constrain|>json",
"content": {"type": "regex", "pattern": "(?:)"},
"end": "<|message|>",
},
{
"type": "tag",
"begin": "<|start|>assistant<|channel|>final <|constrain|>json",
"content": {"type": "regex", "pattern": "(?:)"},
"end": "<|message|>",
},
]


STRUCTURAL_TAG_TEMPLATE = {
"type": "structural_tag",
"format": {
"type": "triggered_tags",
"tags": [
{
"begin": "<|channel|>analysis<|message|>",
"content": {"type": "any_text"},
"end": "<|end|>",
}
],
"triggers": ["<|channel|>analysis"],
"triggers": ["<|channel|>", "<|start|>assistant"],
"tags": [],
"at_least_one": True,
"stop_after_first": False,
},
}


def from_builtin_tool_to_tag(tool: str) -> list[dict]:
tag = [
def create_tool_tags(
channel_name: str, tool_name: str, content_type: str | None = None
) -> list[dict]:
"""
Generate tool-specific tags based on channel name and tool name.

Args:
channel_name: The channel name (e.g., "analysis", "commentary")
tool_name: The tool name (e.g., "python", "container")
content_type: Optional explicit content type. If not provided,
inferred from channel.

Returns:
List of two tag dictionaries for first and last message positions
"""
if content_type is None:
analysis_content_type = "code"
commentary_content_type = "<|constrain|>json"
content_type = (
analysis_content_type
if channel_name == "analysis"
else commentary_content_type
)

return [
# Tool as first message
{
"begin": f"<|channel|>commentary to={tool}",
"content": {"type": "any_text"},
"end": "<|end|>",
"type": "tag",
"begin": f"<|channel|>{channel_name} to={tool_name}",
"content": {"type": "regex", "pattern": "(?:)"},
"end": f" {content_type}<|message|>",
},
# Tool as last message
# It is critical to have this as the model often makes mistakes
# between `<|start|>assistant` and `<|channel|>` tags
# so there needs to be an extra case to prevent it
{
"begin": f"<|channel|>analysis to={tool}",
"content": {"type": "any_text"},
"end": "<|end|>",
"type": "tag",
"begin": f"<|start|>assistant<|channel|>{channel_name} to={tool_name}",
"content": {"type": "regex", "pattern": "(?:)"},
"end": f" {content_type}<|message|>",
},
]
return tag


def tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list: list[str]) -> dict:
import copy

new_tag = copy.deepcopy(no_func_reaonsing_tag)
new_tag["format"]["triggers"].append("<|channel|>commentary to=")

for tool in builtin_tool_list:
new_tag["format"]["tags"].extend(from_builtin_tool_to_tag(tool))
return new_tag
def get_structural_tags(analysis_tools: set[str], commentary_tools: set[str]):
# Start with base tags, but conditionally include commentary tag
if commentary_tools:
# Include all BASE_TAGS if there are commentary tools
tags = BASE_TAGS.copy()
else:
# Exclude commentary BASE_TAG if no commentary tools
tags = [tag for tag in BASE_TAGS if tag["begin"] != "<|channel|>commentary"]

# Add tool-specific tags for commentary channel
for tool_name in commentary_tools:
if tool_name: # Skip empty strings from split
tags.extend(create_tool_tags("commentary", tool_name))

# Add tool-specific tags for analysis channel
for tool_name in analysis_tools:
if tool_name: # Skip empty strings from split
tags.extend(create_tool_tags("analysis", tool_name))
# If commentary tools exist, also allow analysis tools on commentary
# This handles model training issue where it flips between channels
# Use "code" content type (analysis tools keep their format)
if commentary_tools:
tags.extend(create_tool_tags("commentary", tool_name, "code"))

# Build the complete structural tag
structural_tags = copy.deepcopy(STRUCTURAL_TAG_TEMPLATE)
structural_tags["format"]["tags"] = tags
return json.dumps(structural_tags)


@ReasoningParserManager.register_module("openai_gptoss")
class GptOssReasoningParser(ReasoningParser):
"""
Reasoning parser for GptOss model.

The GptOss model uses harmony to extract reasoning content and this parser
is only used for detecting the end of the reasoning content.
"""
Expand Down Expand Up @@ -128,30 +216,19 @@ def extract_reasoning_content(

# This function prepares the structural tag to format reasoning output
def prepare_structured_tag(
self, original_tag: str | None, tool_server: ToolServer | None
self,
original_tag: str | None,
tool_names: set[str] | None = None,
) -> str:
if original_tag is None:
if tool_server is None:
return json.dumps(no_func_reaonsing_tag)
else:
builtin_tool_list: list[str] = []
if tool_server.has_tool("browser"):
builtin_tool_list.append("browser")
if tool_server.has_tool("python"):
builtin_tool_list.append("python")
if tool_server.has_tool("container"):
builtin_tool_list.append("container")

if len(builtin_tool_list) > 0:
logger.info("Builtin_tool_list: %s", builtin_tool_list)
func_tag = json.dumps(
tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tool_list)
)
else:
logger.info("Builtin_tool_list is empty")
func_tag = json.dumps(no_func_reaonsing_tag)

return func_tag
else:
# There is potential risk for appending the tag to the original tag
if original_tag is not None:
return original_tag
# Easiest way to separate based on channel for now
analysis_tools = set()
commentary_tools = set()
if tool_names:
for tool_name in tool_names:
if tool_name.startswith("functions"):
commentary_tools.add(tool_name)
else:
analysis_tools.add(tool_name)
return get_structural_tags(analysis_tools, commentary_tools)