diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 8b3c5ea9de9c0..f9e294af47253 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -4,6 +4,7 @@ import re from contextlib import asynccontextmanager from http import HTTPStatus +from typing import Any, Set import fastapi import uvicorn @@ -33,6 +34,8 @@ openai_serving_completion: OpenAIServingCompletion logger = init_logger(__name__) +_running_tasks: Set[asyncio.Task[Any]] = set() + @asynccontextmanager async def lifespan(app: fastapi.FastAPI): @@ -43,7 +46,9 @@ async def _force_log(): await engine.do_log_stats() if not engine_args.disable_log_stats: - asyncio.create_task(_force_log()) + task = asyncio.create_task(_force_log()) + _running_tasks.add(task) + task.add_done_callback(_running_tasks.remove) yield