Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ async def ping(raw_request: Request) -> Response:
return await health(raw_request)


@router.post("/tokenize", dependencies=[Depends(validate_json_request)])
@router.post("/tokenize", dependencies=[Depends(validate_json_request)], responses={500: {"model": ErrorResponse}})
@with_cancellation
async def tokenize(request: TokenizeRequest, raw_request: Request):
handler = tokenization(raw_request)
Expand All @@ -434,7 +434,7 @@ async def tokenize(request: TokenizeRequest, raw_request: Request):
assert_never(generator)


@router.post("/detokenize", dependencies=[Depends(validate_json_request)])
@router.post("/detokenize", dependencies=[Depends(validate_json_request)], responses={500: {"model": ErrorResponse}})
@with_cancellation
async def detokenize(request: DetokenizeRequest, raw_request: Request):
handler = tokenization(raw_request)
Expand Down Expand Up @@ -464,7 +464,8 @@ async def show_version():


@router.post("/v1/chat/completions",
dependencies=[Depends(validate_json_request)])
dependencies=[Depends(validate_json_request)],
responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}})
@with_cancellation
@load_aware_call
async def create_chat_completion(request: ChatCompletionRequest,
Expand All @@ -486,7 +487,7 @@ async def create_chat_completion(request: ChatCompletionRequest,
return StreamingResponse(content=generator, media_type="text/event-stream")


@router.post("/v1/completions", dependencies=[Depends(validate_json_request)])
@router.post("/v1/completions", dependencies=[Depends(validate_json_request)], responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}})
@with_cancellation
@load_aware_call
async def create_completion(request: CompletionRequest, raw_request: Request):
Expand Down Expand Up @@ -601,7 +602,7 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request):
return await create_score(request, raw_request)


@router.post("/v1/audio/transcriptions")
@router.post("/v1/audio/transcriptions", responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}})
@with_cancellation
@load_aware_call
async def create_transcriptions(request: Annotated[TranscriptionRequest,
Expand Down Expand Up @@ -735,7 +736,7 @@ async def is_sleeping(raw_request: Request):
return JSONResponse(content={"is_sleeping": is_sleeping})


@router.post("/invocations", dependencies=[Depends(validate_json_request)])
@router.post("/invocations", dependencies=[Depends(validate_json_request)], responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}})
async def invocations(raw_request: Request):
"""
For SageMaker, routes requests to other handlers based on model `task`.
Expand Down