diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py index 45bfad03ec867..3c4aac91549a9 100644 --- a/vllm/engine/metrics.py +++ b/vllm/engine/metrics.py @@ -119,7 +119,7 @@ def __init__(self, labelnames: List[str], max_model_len: int): buckets=[1, 2, 5, 10, 20], ) self.counter_request_success = Counter( - name="vllm:request_success", + name="vllm:request_success_total", documentation="Count of successfully processed requests.", labelnames=labelnames + [Metrics.labelname_finish_reason]) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index af9ba7a3bc825..40103f70a31a3 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -2,6 +2,7 @@ import importlib import inspect import os +import re from contextlib import asynccontextmanager from http import HTTPStatus @@ -12,6 +13,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, Response, StreamingResponse from prometheus_client import make_asgi_app +from starlette.routing import Mount import vllm from vllm.engine.arg_utils import AsyncEngineArgs @@ -55,8 +57,10 @@ def parse_args(): # Add prometheus asgi middleware to route /metrics requests -metrics_app = make_asgi_app() -app.mount("/metrics", metrics_app) +route = Mount("/metrics", make_asgi_app()) +# Workaround for 307 Redirect for /metrics +route.path_regex = re.compile('^/metrics(?P.*)$') +app.routes.append(route) @app.exception_handler(RequestValidationError)