1111from fastapi import Request
1212from openai .types .responses import ResponseOutputMessage , ResponseOutputText
1313
14+ from vllm import envs
1415from vllm .config import ModelConfig
1516from vllm .engine .protocol import EngineClient
1617from vllm .entrypoints .chat_utils import (ChatCompletionMessageParam ,
@@ -89,15 +90,17 @@ def __init__(
8990 logger .info ("Using default chat sampling params from %s: %s" ,
9091 source , self .default_sampling_params )
9192
93+ # False by default.
94+ self .enable_store = envs .VLLM_ENABLE_RESPONSES_API_STORE
9295 # HACK(woosuk): This is a hack. We should use a better store.
93- # FIXME: This causes a memory leak since we never remove responses
94- # from the store.
96+ # FIXME: If enable_store=True, this may cause a memory leak since we
97+ # never remove responses from the store.
9598 self .response_store : dict [str , ResponsesResponse ] = {}
9699 self .response_store_lock = asyncio .Lock ()
97100
98101 # HACK(woosuk): This is a hack. We should use a better store.
99- # FIXME: This causes a memory leak since we never remove messages
100- # from the store.
102+ # FIXME: If enable_store=True, this may cause a memory leak since we
103+ # never remove messages from the store.
101104 self .msg_store : dict [str , list [ChatCompletionMessageParam ]] = {}
102105
103106 self .background_tasks : dict [str , asyncio .Task ] = {}
@@ -118,6 +121,10 @@ async def create_responses(
118121 if self .engine_client .errored :
119122 raise self .engine_client .dead_error
120123
124+ # If store is not enabled, return an error.
125+ if request .store and not self .enable_store :
126+ return self ._make_store_not_supported_error ()
127+
121128 # Handle the previous response ID.
122129 prev_response_id = request .previous_response_id
123130 if prev_response_id is not None :
@@ -456,3 +463,13 @@ def _make_not_found_error(self, response_id: str) -> ErrorResponse:
456463 message = f"Response with id '{ response_id } ' not found." ,
457464 status_code = HTTPStatus .NOT_FOUND ,
458465 )
466+
467+ def _make_store_not_supported_error (self ) -> ErrorResponse :
468+ return self .create_error_response (
469+ err_type = "invalid_request_error" ,
470+ message = ("`store=True` (default) is not supported. Please set "
471+ "`store=False` in Responses API or set "
472+ "`VLLM_ENABLE_RESPONSES_API_STORE=1` in the env var when "
473+ "starting the vLLM server." ),
474+ status_code = HTTPStatus .BAD_REQUEST ,
475+ )
0 commit comments