Skip to content

Commit 0388807

Browse files
committed
[V1] add generate optional in health api
Signed-off-by: rongfu.leng <rongfu.leng@daocloud.io>
1 parent 392edee commit 0388807

File tree

1 file changed

+13
-2
lines changed

1 file changed

+13
-2
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,13 @@
9999
log_non_default_args, with_cancellation)
100100
from vllm.logger import init_logger
101101
from vllm.reasoning import ReasoningParserManager
102+
from vllm.sampling_params import SamplingParams
103+
from vllm.transformers_utils.config import (
104+
maybe_register_config_serialize_by_value)
102105
from vllm.transformers_utils.tokenizer import MistralTokenizer
103106
from vllm.usage.usage_lib import UsageContext
104107
from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs,
105-
is_valid_ipv6_address, set_ulimit)
108+
is_valid_ipv6_address, random_uuid, set_ulimit)
106109
from vllm.v1.engine.exceptions import EngineDeadError
107110
from vllm.v1.metrics.prometheus import get_prometheus_registry
108111
from vllm.version import __version__ as VLLM_VERSION
@@ -341,10 +344,18 @@ def engine_client(request: Request) -> EngineClient:
341344

342345

343346
@router.get("/health", response_class=Response)
344-
async def health(raw_request: Request) -> Response:
347+
async def health(
348+
raw_request: Request, generate: Optional[bool] = Query(False)) -> Response:
345349
"""Health check."""
346350
try:
347351
await engine_client(raw_request).check_health()
352+
if generate:
353+
prompt = "Hi"
354+
sampling_params = SamplingParams(temperature=0, max_tokens=2)
355+
request_id = random_uuid()
356+
async for _ in engine_client(raw_request).generate(prompt, sampling_params,
357+
request_id):
358+
pass
348359
return Response(status_code=200)
349360
except EngineDeadError:
350361
return Response(status_code=503)

0 commit comments

Comments
 (0)