Skip to content

Commit b10598d

Browse files
kouroshHakhaepwalsh
authored andcommitted
[Frontend] Add request_id to the Request object so they can be controlled better via external load balancers (vllm-project#21009)
Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
1 parent a18bfa0 commit b10598d

File tree

3 files changed

+27
-3
lines changed

3 files changed

+27
-3
lines changed

vllm/entrypoints/openai/protocol.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,6 +1007,13 @@ class CompletionRequest(OpenAIBaseModel):
10071007
"default: 0). Any priority other than 0 will raise an error "
10081008
"if the served model does not use priority scheduling."),
10091009
)
1010+
request_id: str = Field(
1011+
default_factory=lambda: f"{random_uuid()}",
1012+
description=(
1013+
"The request_id related to this request. If the caller does "
1014+
"not set it, a random_uuid will be generated. This id is used "
1015+
"through out the inference process and return in response."),
1016+
)
10101017
logits_processors: Optional[LogitsProcessors] = Field(
10111018
default=None,
10121019
description=(
@@ -1251,6 +1258,13 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
12511258
"default: 0). Any priority other than 0 will raise an error "
12521259
"if the served model does not use priority scheduling."),
12531260
)
1261+
request_id: str = Field(
1262+
default_factory=lambda: f"{random_uuid()}",
1263+
description=(
1264+
"The request_id related to this request. If the caller does "
1265+
"not set it, a random_uuid will be generated. This id is used "
1266+
"through out the inference process and return in response."),
1267+
)
12541268

12551269
# --8<-- [end:embedding-extra-params]
12561270

@@ -1302,6 +1316,13 @@ class EmbeddingChatRequest(OpenAIBaseModel):
13021316
"default: 0). Any priority other than 0 will raise an error "
13031317
"if the served model does not use priority scheduling."),
13041318
)
1319+
request_id: str = Field(
1320+
default_factory=lambda: f"{random_uuid()}",
1321+
description=(
1322+
"The request_id related to this request. If the caller does "
1323+
"not set it, a random_uuid will be generated. This id is used "
1324+
"through out the inference process and return in response."),
1325+
)
13051326
# --8<-- [end:chat-embedding-extra-params]
13061327

13071328
@model_validator(mode="before")

vllm/entrypoints/openai/serving_completion.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,9 @@ async def create_completion(
113113
return self.create_error_response(
114114
"Echo is unsupported with prompt embeds.")
115115

116-
request_id = f"cmpl-{self._base_request_id(raw_request)}"
116+
request_id = (
117+
f"cmpl-"
118+
f"{self._base_request_id(raw_request, request.request_id)}")
117119
created_time = int(time.time())
118120

119121
request_metadata = RequestResponseMetadata(request_id=request_id)

vllm/entrypoints/openai/serving_embedding.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,9 @@ async def create_embedding(
163163
for the API specification. This API mimics the OpenAI Embedding API.
164164
"""
165165
model_name = self._get_model_name(request.model)
166-
request_id = (f"{self.request_id_prefix}-"
167-
f"{self._base_request_id(raw_request)}")
166+
request_id = (
167+
f"{self.request_id_prefix}-"
168+
f"{self._base_request_id(raw_request, request.request_id)}")
168169

169170
ctx = EmbeddingServeContext(
170171
request=request,

0 commit comments

Comments
 (0)