Skip to content

Commit 73df49e

Browse files
authored
[gpt-oss][1a] create_responses stream outputs BaseModel type, api server is SSE still (#24759)
Signed-off-by: Andrew Xia <axia@meta.com>
1 parent 25aba2b commit 73df49e

File tree

2 files changed

+90
-71
lines changed

2 files changed

+90
-71
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import tempfile
1616
import uuid
1717
from argparse import Namespace
18-
from collections.abc import AsyncIterator, Awaitable
18+
from collections.abc import AsyncGenerator, AsyncIterator, Awaitable
1919
from contextlib import asynccontextmanager
2020
from functools import partial
2121
from http import HTTPStatus
@@ -29,6 +29,7 @@
2929
from fastapi.exceptions import RequestValidationError
3030
from fastapi.middleware.cors import CORSMiddleware
3131
from fastapi.responses import JSONResponse, Response, StreamingResponse
32+
from openai import BaseModel
3233
from prometheus_client import make_asgi_app
3334
from prometheus_fastapi_instrumentator import Instrumentator
3435
from starlette.concurrency import iterate_in_threadpool
@@ -577,6 +578,18 @@ async def show_version():
577578
return JSONResponse(content=ver)
578579

579580

581+
async def _convert_stream_to_sse_events(
582+
generator: AsyncGenerator[BaseModel,
583+
None]) -> AsyncGenerator[str, None]:
584+
"""Convert the generator to a stream of events in SSE format"""
585+
async for event in generator:
586+
event_type = getattr(event, 'type', 'unknown')
587+
# https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format
588+
event_data = (f"event: {event_type}\n"
589+
f"data: {event.model_dump_json(indent=None)}\n\n")
590+
yield event_data
591+
592+
580593
@router.post("/v1/responses",
581594
dependencies=[Depends(validate_json_request)],
582595
responses={
@@ -612,7 +625,9 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
612625
status_code=generator.error.code)
613626
elif isinstance(generator, ResponsesResponse):
614627
return JSONResponse(content=generator.model_dump())
615-
return StreamingResponse(content=generator, media_type="text/event-stream")
628+
629+
return StreamingResponse(content=_convert_stream_to_sse_events(generator),
630+
media_type="text/event-stream")
616631

617632

618633
@router.get("/v1/responses/{response_id}")
@@ -640,10 +655,10 @@ async def retrieve_responses(
640655
if isinstance(response, ErrorResponse):
641656
return JSONResponse(content=response.model_dump(),
642657
status_code=response.error.code)
643-
elif stream:
644-
return StreamingResponse(content=response,
645-
media_type="text/event-stream")
646-
return JSONResponse(content=response.model_dump())
658+
elif isinstance(response, ResponsesResponse):
659+
return JSONResponse(content=response.model_dump())
660+
return StreamingResponse(content=_convert_stream_to_sse_events(response),
661+
media_type="text/event-stream")
647662

648663

649664
@router.post("/v1/responses/{response_id}/cancel")

0 commit comments

Comments
 (0)