Skip to content

Commit 57d5887

Browse files
heheda12345epwalsh
authored andcommitted
[gpt-oss] Generate ResponseOutputItem from Harmony Message (vllm-project#22410)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
1 parent ffcdba4 commit 57d5887

File tree

4 files changed

+292
-82
lines changed

4 files changed

+292
-82
lines changed

tests/v1/entrypoints/openai/responses/test_basic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ async def test_simple_input(client: openai.AsyncOpenAI):
1717

1818
# Whether the output contains the reasoning.
1919
assert outputs[0].type == "reasoning"
20-
assert outputs[0].text != ""
20+
assert outputs[0].content[0].text != ""
2121

2222

2323
@pytest.mark.asyncio

vllm/entrypoints/harmony_utils.py

Lines changed: 150 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import datetime
4+
import json
45
from collections.abc import Iterable, Sequence
56
from typing import Literal, Optional, Union
67

7-
from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem
8+
from openai.types.responses import (ResponseFunctionToolCall,
9+
ResponseOutputItem, ResponseOutputMessage,
10+
ResponseOutputText, ResponseReasoningItem)
11+
from openai.types.responses.response_function_web_search import (
12+
ActionFind, ActionOpenPage, ActionSearch, ResponseFunctionWebSearch)
13+
from openai.types.responses.response_reasoning_item import (
14+
Content as ResponseReasoningTextContent)
815
from openai.types.responses.tool import Tool
916
from openai_harmony import (Author, Conversation, DeveloperContent,
1017
HarmonyEncodingName, Message, ReasoningEffort,
1118
Role, StreamableParser, SystemContent, TextContent,
1219
ToolDescription, load_harmony_encoding)
1320

14-
from vllm.entrypoints.openai.protocol import (ResponseInputOutputItem,
15-
ResponseReasoningItem)
21+
from vllm.entrypoints.openai.protocol import ResponseInputOutputItem
22+
from vllm.utils import random_uuid
1623

1724
REASONING_EFFORT = {
1825
"high": ReasoningEffort.HIGH,
@@ -160,6 +167,146 @@ def render_for_completion(messages: list[Message]) -> list[int]:
160167
return token_ids
161168

162169

170+
def parse_output_message(message: Message) -> list[ResponseOutputItem]:
171+
"""
172+
Parse a Harmony message into a list of output response items.
173+
"""
174+
if message.author.role != "assistant":
175+
# This is a message from a tool to the assistant (e.g., search result).
176+
# Don't include it in the final output for now. This aligns with
177+
# OpenAI's behavior on models like o4-mini.
178+
return []
179+
180+
output_items: list[ResponseOutputItem] = []
181+
recipient = message.recipient
182+
if recipient is not None and recipient.startswith("browser."):
183+
if len(message.content) != 1:
184+
raise ValueError("Invalid number of contents in browser message")
185+
content = message.content[0]
186+
browser_call = json.loads(content.text)
187+
# TODO: translate to url properly!
188+
if recipient == "browser.search":
189+
action = ActionSearch(
190+
query=f"cursor:{browser_call.get('query', '')}", type="search")
191+
elif recipient == "browser.open":
192+
action = ActionOpenPage(
193+
url=f"cursor:{browser_call.get('url', '')}", type="open_page")
194+
elif recipient == "browser.find":
195+
action = ActionFind(pattern=browser_call["pattern"],
196+
url=f"cursor:{browser_call.get('url', '')}",
197+
type="find")
198+
else:
199+
raise ValueError(f"Unknown browser action: {recipient}")
200+
web_search_item = ResponseFunctionWebSearch(
201+
id=f"ws_{random_uuid()}",
202+
action=action,
203+
status="completed",
204+
type="web_search_call",
205+
)
206+
output_items.append(web_search_item)
207+
elif message.channel == "analysis":
208+
for content in message.content:
209+
reasoning_item = ResponseReasoningItem(
210+
id=f"rs_{random_uuid()}",
211+
summary=[],
212+
type="reasoning",
213+
content=[
214+
ResponseReasoningTextContent(text=content.text,
215+
type="reasoning_text")
216+
],
217+
status=None,
218+
)
219+
output_items.append(reasoning_item)
220+
elif message.channel == "commentary":
221+
if message.recipient.startswith("functions."):
222+
function_name = message.recipient.split(".")[-1]
223+
for content in message.content:
224+
random_id = random_uuid()
225+
response_item = ResponseFunctionToolCall(
226+
arguments=content.text,
227+
call_id=f"call_{random_id}",
228+
type="function_call",
229+
name=function_name,
230+
id=f"ft_{random_id}",
231+
)
232+
output_items.append(response_item)
233+
elif message.recipient.startswith(
234+
"python") or message.recipient.startswith("browser"):
235+
for content in message.content:
236+
reasoning_item = ResponseReasoningItem(
237+
id=f"rs_{random_uuid()}",
238+
summary=[],
239+
type="reasoning",
240+
text=content.text,
241+
status=None,
242+
)
243+
output_items.append(reasoning_item)
244+
else:
245+
raise ValueError(f"Unknown recipient: {message.recipient}")
246+
elif message.channel == "final":
247+
contents = []
248+
for content in message.content:
249+
output_text = ResponseOutputText(
250+
text=content.text,
251+
annotations=[], # TODO
252+
type="output_text",
253+
logprobs=None, # TODO
254+
)
255+
contents.append(output_text)
256+
text_item = ResponseOutputMessage(
257+
id=f"msg_{random_uuid()}",
258+
content=contents,
259+
role=message.author.role,
260+
status="completed",
261+
type="message",
262+
)
263+
output_items.append(text_item)
264+
else:
265+
raise ValueError(f"Unknown channel: {message.channel}")
266+
return output_items
267+
268+
269+
def parse_remaining_state(
270+
parser: StreamableParser) -> list[ResponseOutputItem]:
271+
if not parser.current_content:
272+
return []
273+
if parser.current_role != Role.ASSISTANT:
274+
return []
275+
current_recipient = parser.current_recipient
276+
if (current_recipient is not None
277+
and current_recipient.startswith("browser.")):
278+
return []
279+
280+
if parser.current_channel == "analysis":
281+
reasoning_item = ResponseReasoningItem(
282+
id=f"rs_{random_uuid()}",
283+
summary=[],
284+
type="reasoning",
285+
content=[
286+
ResponseReasoningTextContent(text=parser.current_content,
287+
type="reasoning_text")
288+
],
289+
status=None,
290+
)
291+
return [reasoning_item]
292+
elif parser.current_channel == "final":
293+
output_text = ResponseOutputText(
294+
text=parser.current_content,
295+
annotations=[], # TODO
296+
type="output_text",
297+
logprobs=None, # TODO
298+
)
299+
text_item = ResponseOutputMessage(
300+
id=f"msg_{random_uuid()}",
301+
content=[output_text],
302+
role="assistant",
303+
status="completed",
304+
type="message",
305+
)
306+
return [text_item]
307+
return []
308+
309+
163310
def get_stop_tokens_for_assistant_actions() -> list[int]:
164311
return get_encoding().stop_tokens_for_assistant_actions()
165312

vllm/entrypoints/openai/protocol.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
# yapf: enable
2020
from openai.types.responses import (ResponseFunctionToolCall,
2121
ResponseInputItemParam, ResponseOutputItem,
22-
ResponseOutputMessage, ResponsePrompt,
23-
ResponseStatus, ResponseTextConfig)
22+
ResponsePrompt, ResponseStatus,
23+
ResponseTextConfig)
2424
from openai.types.responses.response import ToolChoice
2525
from openai.types.responses.tool import Tool
2626
from openai.types.shared import Metadata, Reasoning
@@ -1729,13 +1729,20 @@ class TranscriptionStreamResponse(OpenAIBaseModel):
17291729
usage: Optional[UsageInfo] = Field(default=None)
17301730

17311731

1732-
class ResponseReasoningItem(OpenAIBaseModel):
1733-
id: str = Field(default_factory=lambda: f"rs_{random_uuid()}")
1734-
text: str
1735-
summary: list = Field(default_factory=list)
1736-
type: Literal["reasoning"] = "reasoning"
1737-
encrypted_content: Optional[str] = None
1738-
status: Optional[Literal["in_progress", "completed", "incomplete"]]
1732+
class InputTokensDetails(OpenAIBaseModel):
1733+
cached_tokens: int
1734+
1735+
1736+
class OutputTokensDetails(OpenAIBaseModel):
1737+
reasoning_tokens: int
1738+
1739+
1740+
class ResponseUsage(OpenAIBaseModel):
1741+
input_tokens: int
1742+
input_tokens_details: InputTokensDetails
1743+
output_tokens: int
1744+
output_tokens_details: OutputTokensDetails
1745+
total_tokens: int
17391746

17401747

17411748
class ResponsesResponse(OpenAIBaseModel):
@@ -1747,7 +1754,7 @@ class ResponsesResponse(OpenAIBaseModel):
17471754
metadata: Optional[Metadata] = None
17481755
model: str
17491756
object: Literal["response"] = "response"
1750-
output: list[Union[ResponseOutputMessage, ResponseReasoningItem]]
1757+
output: list[ResponseOutputItem]
17511758
parallel_tool_calls: bool
17521759
temperature: float
17531760
tool_choice: ToolChoice
@@ -1764,7 +1771,7 @@ class ResponsesResponse(OpenAIBaseModel):
17641771
text: Optional[ResponseTextConfig] = None
17651772
top_logprobs: int
17661773
truncation: Literal["auto", "disabled"]
1767-
usage: Optional[UsageInfo] = None
1774+
usage: Optional[ResponseUsage] = None
17681775
user: Optional[str] = None
17691776

17701777
@classmethod
@@ -1776,7 +1783,7 @@ def from_request(
17761783
created_time: int,
17771784
output: list[ResponseOutputItem],
17781785
status: ResponseStatus,
1779-
usage: Optional[UsageInfo] = None,
1786+
usage: Optional[ResponseUsage] = None,
17801787
) -> "ResponsesResponse":
17811788
return cls(
17821789
id=request.request_id,

0 commit comments

Comments
 (0)