Skip to content

Commit 71d63ed

Browse files
authored
migrate pydantic from v1 to v2 (#2531)
1 parent d75c407 commit 71d63ed

File tree

7 files changed

+26
-22
lines changed

7 files changed

+26
-22
lines changed

requirements-neuron.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ torch-neuronx >= 2.1.0
55
neuronx-cc
66
fastapi
77
uvicorn[standard]
8-
pydantic == 1.10.13 # Required for OpenAI server.
8+
pydantic >= 2.0 # Required for OpenAI server.
99
aioprometheus[starlette]

requirements-rocm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ tokenizers>=0.15.0
99
transformers >= 4.36.0 # Required for Mixtral.
1010
fastapi
1111
uvicorn[standard]
12-
pydantic == 1.10.13 # Required for OpenAI server.
12+
pydantic >= 2.0 # Required for OpenAI server.
1313
aioprometheus[starlette]

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,5 @@ transformers >= 4.36.0 # Required for Mixtral.
88
xformers == 0.0.23.post1 # Required for CUDA 12.1.
99
fastapi
1010
uvicorn[standard]
11-
pydantic == 1.10.13 # Required for OpenAI server.
11+
pydantic >= 2.0 # Required for OpenAI server.
1212
aioprometheus[starlette]

vllm/entrypoints/openai/api_server.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def parse_args():
106106
@app.exception_handler(RequestValidationError)
107107
async def validation_exception_handler(_, exc):
108108
err = openai_serving_chat.create_error_response(message=str(exc))
109-
return JSONResponse(err.dict(), status_code=HTTPStatus.BAD_REQUEST)
109+
return JSONResponse(err.model_dump(), status_code=HTTPStatus.BAD_REQUEST)
110110

111111

112112
@app.get("/health")
@@ -118,30 +118,36 @@ async def health() -> Response:
118118
@app.get("/v1/models")
119119
async def show_available_models():
120120
models = await openai_serving_chat.show_available_models()
121-
return JSONResponse(content=models.dict())
121+
return JSONResponse(content=models.model_dump())
122122

123123

124124
@app.post("/v1/chat/completions")
125125
async def create_chat_completion(request: ChatCompletionRequest,
126126
raw_request: Request):
127127
generator = await openai_serving_chat.create_chat_completion(
128128
request, raw_request)
129-
if request.stream and not isinstance(generator, ErrorResponse):
129+
if isinstance(generator, ErrorResponse):
130+
return JSONResponse(content=generator.model_dump(),
131+
status_code=generator.code)
132+
if request.stream:
130133
return StreamingResponse(content=generator,
131134
media_type="text/event-stream")
132135
else:
133-
return JSONResponse(content=generator.dict())
136+
return JSONResponse(content=generator.model_dump())
134137

135138

136139
@app.post("/v1/completions")
137140
async def create_completion(request: CompletionRequest, raw_request: Request):
138141
generator = await openai_serving_completion.create_completion(
139142
request, raw_request)
140-
if request.stream and not isinstance(generator, ErrorResponse):
143+
if isinstance(generator, ErrorResponse):
144+
return JSONResponse(content=generator.model_dump(),
145+
status_code=generator.code)
146+
if request.stream:
141147
return StreamingResponse(content=generator,
142148
media_type="text/event-stream")
143149
else:
144-
return JSONResponse(content=generator.dict())
150+
return JSONResponse(content=generator.model_dump())
145151

146152

147153
if __name__ == "__main__":

vllm/entrypoints/openai/protocol.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class ErrorResponse(BaseModel):
1414
message: str
1515
type: str
1616
param: Optional[str] = None
17-
code: Optional[str] = None
17+
code: int
1818

1919

2020
class ModelPermission(BaseModel):
@@ -189,7 +189,7 @@ class CompletionStreamResponse(BaseModel):
189189
created: int = Field(default_factory=lambda: int(time.time()))
190190
model: str
191191
choices: List[CompletionResponseStreamChoice]
192-
usage: Optional[UsageInfo]
192+
usage: Optional[UsageInfo] = Field(default=None)
193193

194194

195195
class ChatMessage(BaseModel):
@@ -229,5 +229,4 @@ class ChatCompletionStreamResponse(BaseModel):
229229
created: int = Field(default_factory=lambda: int(time.time()))
230230
model: str
231231
choices: List[ChatCompletionResponseStreamChoice]
232-
usage: Optional[UsageInfo] = Field(
233-
default=None, description="data about request and response")
232+
usage: Optional[UsageInfo] = Field(default=None)

vllm/entrypoints/openai/serving_chat.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ async def chat_completion_stream_generator(
102102
created=created_time,
103103
choices=[choice_data],
104104
model=model_name)
105-
data = chunk.json(exclude_unset=True, ensure_ascii=False)
105+
data = chunk.model_dump_json(exclude_unset=True)
106106
yield f"data: {data}\n\n"
107107

108108
# Send response to echo the input portion of the last message
@@ -125,7 +125,7 @@ async def chat_completion_stream_generator(
125125
created=created_time,
126126
choices=[choice_data],
127127
model=model_name)
128-
data = chunk.json(exclude_unset=True, ensure_ascii=False)
128+
data = chunk.model_dump_json(exclude_unset=True)
129129
yield f"data: {data}\n\n"
130130

131131
# Send response for each token for each request.n (index)
@@ -156,7 +156,7 @@ async def chat_completion_stream_generator(
156156
created=created_time,
157157
choices=[choice_data],
158158
model=model_name)
159-
data = chunk.json(exclude_unset=True, ensure_ascii=False)
159+
data = chunk.model_dump_json(exclude_unset=True)
160160
yield f"data: {data}\n\n"
161161
else:
162162
# Send the finish response for each request.n only once
@@ -178,9 +178,8 @@ async def chat_completion_stream_generator(
178178
model=model_name)
179179
if final_usage is not None:
180180
chunk.usage = final_usage
181-
data = chunk.json(exclude_unset=True,
182-
exclude_none=True,
183-
ensure_ascii=False)
181+
data = chunk.model_dump_json(exclude_unset=True,
182+
exclude_none=True)
184183
yield f"data: {data}\n\n"
185184
finish_reason_sent[i] = True
186185
# Send the final done message after all response.n are finished

vllm/entrypoints/openai/serving_completion.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ async def completion_stream_generator(
7474
logprobs=logprobs,
7575
finish_reason=finish_reason,
7676
)
77-
]).json(exclude_unset=True, ensure_ascii=False)
77+
]).model_dump_json(exclude_unset=True)
7878
yield f"data: {response_json}\n\n"
7979

8080
if output.finish_reason is not None:
@@ -99,7 +99,7 @@ async def completion_stream_generator(
9999
)
100100
],
101101
usage=final_usage,
102-
).json(exclude_unset=True, ensure_ascii=False)
102+
).model_dump_json(exclude_unset=True)
103103
yield f"data: {response_json}\n\n"
104104

105105
yield "data: [DONE]\n\n"
@@ -279,7 +279,7 @@ async def create_completion(self, request: CompletionRequest,
279279
# When user requests streaming but we don't stream, we still need to
280280
# return a streaming response with a single event.
281281
if request.stream:
282-
response_json = response.json(ensure_ascii=False)
282+
response_json = response.model_dump_json()
283283

284284
async def fake_stream_generator() -> AsyncGenerator[str, None]:
285285
yield f"data: {response_json}\n\n"

0 commit comments

Comments
 (0)