refactor(benchmarks): add type annotations to wait_for_endpoint parameters (vllm-project#25218)

samzong · charlifu · commit e434176fb405 · 2025-09-25T16:25:41.000Z
Signed-off-by: samzong &lt;samzong.lu@gmail.com&gt;
Signed-off-by: charlifu &lt;charlifu@amd.com&gt;
diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py
@@ -8,8 +8,9 @@
 import sys
 import time
 import traceback
+from collections.abc import Awaitable
 from dataclasses import dataclass, field
-from typing import Optional, Union
+from typing import Optional, Protocol, Union
 
 import aiohttp
 from tqdm.asyncio import tqdm
@@ -92,6 +93,16 @@ class RequestFuncOutput:
     start_time: float = 0.0
 
 
+class RequestFunc(Protocol):
+    def __call__(
+        self,
+        request_func_input: RequestFuncInput,
+        session: aiohttp.ClientSession,
+        pbar: Optional[tqdm] = None,
+    ) -> Awaitable[RequestFuncOutput]:
+        ...
+
+
 async def async_request_openai_completions(
     request_func_input: RequestFuncInput,
     session: aiohttp.ClientSession,
@@ -507,7 +518,7 @@ async def async_request_openai_embeddings(
 
 
 # TODO: Add more request functions for different API protocols.
-ASYNC_REQUEST_FUNCS = {
+ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = {
     "vllm": async_request_openai_completions,
     "openai": async_request_openai_completions,
     "openai-chat": async_request_openai_chat_completions,
diff --git a/vllm/benchmarks/lib/ready_checker.py b/vllm/benchmarks/lib/ready_checker.py
@@ -8,11 +8,12 @@
 import aiohttp
 from tqdm.asyncio import tqdm
 
-from .endpoint_request_func import RequestFuncInput, RequestFuncOutput
+from .endpoint_request_func import (RequestFunc, RequestFuncInput,
+                                    RequestFuncOutput)
 
 
 async def wait_for_endpoint(
-    request_func,
+    request_func: RequestFunc,
     test_input: RequestFuncInput,
     session: aiohttp.ClientSession,
     timeout_seconds: int = 600,