|
1 | 1 | # SPDX-License-Identifier: Apache-2.0 |
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
3 | 3 | import datetime |
| 4 | +import json |
4 | 5 | from collections.abc import Iterable, Sequence |
5 | 6 | from typing import Literal, Optional, Union |
6 | 7 |
|
7 | | -from openai.types.responses import ResponseFunctionToolCall, ResponseOutputItem |
| 8 | +from openai.types.responses import (ResponseFunctionToolCall, |
| 9 | + ResponseOutputItem, ResponseOutputMessage, |
| 10 | + ResponseOutputText, ResponseReasoningItem) |
| 11 | +from openai.types.responses.response_function_web_search import ( |
| 12 | + ActionFind, ActionOpenPage, ActionSearch, ResponseFunctionWebSearch) |
| 13 | +from openai.types.responses.response_reasoning_item import ( |
| 14 | + Content as ResponseReasoningTextContent) |
8 | 15 | from openai.types.responses.tool import Tool |
9 | 16 | from openai_harmony import (Author, Conversation, DeveloperContent, |
10 | 17 | HarmonyEncodingName, Message, ReasoningEffort, |
11 | 18 | Role, StreamableParser, SystemContent, TextContent, |
12 | 19 | ToolDescription, load_harmony_encoding) |
13 | 20 |
|
14 | | -from vllm.entrypoints.openai.protocol import (ResponseInputOutputItem, |
15 | | - ResponseReasoningItem) |
| 21 | +from vllm.entrypoints.openai.protocol import ResponseInputOutputItem |
| 22 | +from vllm.utils import random_uuid |
16 | 23 |
|
17 | 24 | REASONING_EFFORT = { |
18 | 25 | "high": ReasoningEffort.HIGH, |
@@ -160,6 +167,146 @@ def render_for_completion(messages: list[Message]) -> list[int]: |
160 | 167 | return token_ids |
161 | 168 |
|
162 | 169 |
|
| 170 | +def parse_output_message(message: Message) -> list[ResponseOutputItem]: |
| 171 | + """ |
| 172 | + Parse a Harmony message into a list of output response items. |
| 173 | + """ |
| 174 | + if message.author.role != "assistant": |
| 175 | + # This is a message from a tool to the assistant (e.g., search result). |
| 176 | + # Don't include it in the final output for now. This aligns with |
| 177 | + # OpenAI's behavior on models like o4-mini. |
| 178 | + return [] |
| 179 | + |
| 180 | + output_items: list[ResponseOutputItem] = [] |
| 181 | + recipient = message.recipient |
| 182 | + if recipient is not None and recipient.startswith("browser."): |
| 183 | + if len(message.content) != 1: |
| 184 | + raise ValueError("Invalid number of contents in browser message") |
| 185 | + content = message.content[0] |
| 186 | + browser_call = json.loads(content.text) |
| 187 | + # TODO: translate to url properly! |
| 188 | + if recipient == "browser.search": |
| 189 | + action = ActionSearch( |
| 190 | + query=f"cursor:{browser_call.get('query', '')}", type="search") |
| 191 | + elif recipient == "browser.open": |
| 192 | + action = ActionOpenPage( |
| 193 | + url=f"cursor:{browser_call.get('url', '')}", type="open_page") |
| 194 | + elif recipient == "browser.find": |
| 195 | + action = ActionFind(pattern=browser_call["pattern"], |
| 196 | + url=f"cursor:{browser_call.get('url', '')}", |
| 197 | + type="find") |
| 198 | + else: |
| 199 | + raise ValueError(f"Unknown browser action: {recipient}") |
| 200 | + web_search_item = ResponseFunctionWebSearch( |
| 201 | + id=f"ws_{random_uuid()}", |
| 202 | + action=action, |
| 203 | + status="completed", |
| 204 | + type="web_search_call", |
| 205 | + ) |
| 206 | + output_items.append(web_search_item) |
| 207 | + elif message.channel == "analysis": |
| 208 | + for content in message.content: |
| 209 | + reasoning_item = ResponseReasoningItem( |
| 210 | + id=f"rs_{random_uuid()}", |
| 211 | + summary=[], |
| 212 | + type="reasoning", |
| 213 | + content=[ |
| 214 | + ResponseReasoningTextContent(text=content.text, |
| 215 | + type="reasoning_text") |
| 216 | + ], |
| 217 | + status=None, |
| 218 | + ) |
| 219 | + output_items.append(reasoning_item) |
| 220 | + elif message.channel == "commentary": |
| 221 | + if message.recipient.startswith("functions."): |
| 222 | + function_name = message.recipient.split(".")[-1] |
| 223 | + for content in message.content: |
| 224 | + random_id = random_uuid() |
| 225 | + response_item = ResponseFunctionToolCall( |
| 226 | + arguments=content.text, |
| 227 | + call_id=f"call_{random_id}", |
| 228 | + type="function_call", |
| 229 | + name=function_name, |
| 230 | + id=f"ft_{random_id}", |
| 231 | + ) |
| 232 | + output_items.append(response_item) |
| 233 | + elif message.recipient.startswith( |
| 234 | + "python") or message.recipient.startswith("browser"): |
| 235 | + for content in message.content: |
| 236 | + reasoning_item = ResponseReasoningItem( |
| 237 | + id=f"rs_{random_uuid()}", |
| 238 | + summary=[], |
| 239 | + type="reasoning", |
| 240 | + text=content.text, |
| 241 | + status=None, |
| 242 | + ) |
| 243 | + output_items.append(reasoning_item) |
| 244 | + else: |
| 245 | + raise ValueError(f"Unknown recipient: {message.recipient}") |
| 246 | + elif message.channel == "final": |
| 247 | + contents = [] |
| 248 | + for content in message.content: |
| 249 | + output_text = ResponseOutputText( |
| 250 | + text=content.text, |
| 251 | + annotations=[], # TODO |
| 252 | + type="output_text", |
| 253 | + logprobs=None, # TODO |
| 254 | + ) |
| 255 | + contents.append(output_text) |
| 256 | + text_item = ResponseOutputMessage( |
| 257 | + id=f"msg_{random_uuid()}", |
| 258 | + content=contents, |
| 259 | + role=message.author.role, |
| 260 | + status="completed", |
| 261 | + type="message", |
| 262 | + ) |
| 263 | + output_items.append(text_item) |
| 264 | + else: |
| 265 | + raise ValueError(f"Unknown channel: {message.channel}") |
| 266 | + return output_items |
| 267 | + |
| 268 | + |
| 269 | +def parse_remaining_state( |
| 270 | + parser: StreamableParser) -> list[ResponseOutputItem]: |
| 271 | + if not parser.current_content: |
| 272 | + return [] |
| 273 | + if parser.current_role != Role.ASSISTANT: |
| 274 | + return [] |
| 275 | + current_recipient = parser.current_recipient |
| 276 | + if (current_recipient is not None |
| 277 | + and current_recipient.startswith("browser.")): |
| 278 | + return [] |
| 279 | + |
| 280 | + if parser.current_channel == "analysis": |
| 281 | + reasoning_item = ResponseReasoningItem( |
| 282 | + id=f"rs_{random_uuid()}", |
| 283 | + summary=[], |
| 284 | + type="reasoning", |
| 285 | + content=[ |
| 286 | + ResponseReasoningTextContent(text=parser.current_content, |
| 287 | + type="reasoning_text") |
| 288 | + ], |
| 289 | + status=None, |
| 290 | + ) |
| 291 | + return [reasoning_item] |
| 292 | + elif parser.current_channel == "final": |
| 293 | + output_text = ResponseOutputText( |
| 294 | + text=parser.current_content, |
| 295 | + annotations=[], # TODO |
| 296 | + type="output_text", |
| 297 | + logprobs=None, # TODO |
| 298 | + ) |
| 299 | + text_item = ResponseOutputMessage( |
| 300 | + id=f"msg_{random_uuid()}", |
| 301 | + content=[output_text], |
| 302 | + role="assistant", |
| 303 | + status="completed", |
| 304 | + type="message", |
| 305 | + ) |
| 306 | + return [text_item] |
| 307 | + return [] |
| 308 | + |
| 309 | + |
163 | 310 | def get_stop_tokens_for_assistant_actions() -> list[int]: |
164 | 311 | return get_encoding().stop_tokens_for_assistant_actions() |
165 | 312 |
|
|
0 commit comments