|
23 | 23 | import pydantic |
24 | 24 | import regex as re |
25 | 25 | import uvloop |
26 | | -from fastapi import (APIRouter, Depends, FastAPI, Form, HTTPException, Query, |
27 | | - Request) |
| 26 | +from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request |
28 | 27 | from fastapi.exceptions import RequestValidationError |
29 | 28 | from fastapi.middleware.cors import CORSMiddleware |
30 | 29 | from fastapi.responses import JSONResponse, Response, StreamingResponse |
|
99 | 98 | log_non_default_args, with_cancellation) |
100 | 99 | from vllm.logger import init_logger |
101 | 100 | from vllm.reasoning import ReasoningParserManager |
102 | | -from vllm.sampling_params import SamplingParams |
103 | 101 | from vllm.transformers_utils.config import ( |
104 | 102 | maybe_register_config_serialize_by_value) |
105 | 103 | from vllm.transformers_utils.tokenizer import MistralTokenizer |
106 | 104 | from vllm.usage.usage_lib import UsageContext |
107 | 105 | from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs, |
108 | | - is_valid_ipv6_address, random_uuid, set_ulimit) |
| 106 | + get_open_zmq_ipc_path,is_valid_ipv6_address, random_uuid, |
| 107 | + set_ulimit) |
109 | 108 | from vllm.v1.engine.exceptions import EngineDeadError |
110 | 109 | from vllm.v1.metrics.prometheus import get_prometheus_registry |
111 | 110 | from vllm.version import __version__ as VLLM_VERSION |
@@ -344,18 +343,13 @@ def engine_client(request: Request) -> EngineClient: |
344 | 343 |
|
345 | 344 |
|
346 | 345 | @router.get("/health", response_class=Response) |
347 | | -async def health( |
348 | | - raw_request: Request, generate: Optional[bool] = Query(False)) -> Response: |
| 346 | +async def health(raw_request: Request) -> Response: |
349 | 347 | """Health check.""" |
350 | 348 | try: |
351 | 349 | await engine_client(raw_request).check_health() |
352 | | - if generate: |
353 | | - prompt = "Hi" |
354 | | - sampling_params = SamplingParams(temperature=0, max_tokens=2) |
355 | | - request_id = random_uuid() |
356 | | - async for _ in engine_client(raw_request).generate(prompt, sampling_params, |
357 | | - request_id): |
358 | | - pass |
| 350 | + generate_str = raw_request.query_params.get("generate") |
| 351 | + if generate_str == "true": |
| 352 | + await engine_client(raw_request).minimal_generation() |
359 | 353 | return Response(status_code=200) |
360 | 354 | except EngineDeadError: |
361 | 355 | return Response(status_code=503) |
|
0 commit comments