|
99 | 99 | log_non_default_args, with_cancellation) |
100 | 100 | from vllm.logger import init_logger |
101 | 101 | from vllm.reasoning import ReasoningParserManager |
| 102 | +from vllm.sampling_params import SamplingParams |
| 103 | +from vllm.transformers_utils.config import ( |
| 104 | + maybe_register_config_serialize_by_value) |
102 | 105 | from vllm.transformers_utils.tokenizer import MistralTokenizer |
103 | 106 | from vllm.usage.usage_lib import UsageContext |
104 | 107 | from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs, |
105 | | - is_valid_ipv6_address, set_ulimit) |
| 108 | + is_valid_ipv6_address, random_uuid, set_ulimit) |
106 | 109 | from vllm.v1.engine.exceptions import EngineDeadError |
107 | 110 | from vllm.v1.metrics.prometheus import get_prometheus_registry |
108 | 111 | from vllm.version import __version__ as VLLM_VERSION |
@@ -341,10 +344,18 @@ def engine_client(request: Request) -> EngineClient: |
341 | 344 |
|
342 | 345 |
|
343 | 346 | @router.get("/health", response_class=Response) |
344 | | -async def health(raw_request: Request) -> Response: |
| 347 | +async def health( |
| 348 | + raw_request: Request, generate: Optional[bool] = Query(False)) -> Response: |
345 | 349 | """Health check.""" |
346 | 350 | try: |
347 | 351 | await engine_client(raw_request).check_health() |
| 352 | + if generate: |
| 353 | + prompt = "Hi" |
| 354 | + sampling_params = SamplingParams(temperature=0, max_tokens=2) |
| 355 | + request_id = random_uuid() |
| 356 | + async for _ in engine_client(raw_request).generate(prompt, sampling_params, |
| 357 | + request_id): |
| 358 | + pass |
348 | 359 | return Response(status_code=200) |
349 | 360 | except EngineDeadError: |
350 | 361 | return Response(status_code=503) |
|
0 commit comments