vllm-project · DarkLight1337 · Oct 16, 2025 · Oct 15, 2025 · Oct 15, 2025
@@ -12,7 +12,7 @@
 from vllm.inputs import TextPrompt
 from vllm.lora.request import LoRARequest
 from vllm.sampling_params import SamplingParams
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.asyncio import merge_async_iterators
 
 MODEL_PATH = "zai-org/chatglm3-6b"
 LORA_RANK = 64

@@ -17,7 +17,7 @@
 )
 
 from vllm.platforms import current_platform
-from vllm.utils.func import identity
+from vllm.utils.functools import identity
 
 from ....conftest import (
     IMAGE_ASSETS,

diff --git a/tests/utils_/test_async_utils.py b/tests/utils_/test_async_utils.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.asyncio import merge_async_iterators
 
 
 async def _mock_async_iterator(idx: int):

diff --git a/tests/utils_/test_func_utils.py b/tests/utils_/test_func_utils.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from vllm.utils.func import deprecate_kwargs, supports_kw
+from vllm.utils.functools import deprecate_kwargs, supports_kw
 
 from ..utils import error_on_warning
 

diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py
@@ -34,7 +34,7 @@
 from vllm.lora.request import LoRARequest
 from vllm.outputs import RequestOutput
 from vllm.sampling_params import BeamSearchParams
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.asyncio import merge_async_iterators
 
 
 def run_vllm(

@@ -51,7 +51,7 @@
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
 from vllm.utils import random_uuid
-from vllm.utils.func import supports_kw
+from vllm.utils.functools import supports_kw
 
 logger = init_logger(__name__)
 

@@ -35,7 +35,7 @@
 from vllm.sampling_params import BeamSearchParams, SamplingParams
 from vllm.transformers_utils.tokenizer import AnyTokenizer
 from vllm.utils import as_list
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.asyncio import merge_async_iterators
 
 logger = init_logger(__name__)
 

@@ -40,7 +40,7 @@
 )
 from vllm.pooling_params import PoolingParams
 from vllm.utils import chunk_list
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.asyncio import merge_async_iterators
 
 logger = init_logger(__name__)
 

@@ -91,7 +91,7 @@
 )
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
 from vllm.utils import is_list_of, random_uuid
-from vllm.utils.async_utils import (
+from vllm.utils.asyncio import (
     AsyncMicrobatchTokenizer,
     collect_from_async_generator,
     make_async,

@@ -36,7 +36,7 @@
 from vllm.logger import init_logger
 from vllm.outputs import PoolingOutput, PoolingRequestOutput
 from vllm.tasks import SupportedTask
-from vllm.utils.async_utils import merge_async_iterators
+from vllm.utils.asyncio import merge_async_iterators
 
 logger = init_logger(__name__)
 

@@ -37,7 +37,7 @@
 from vllm.lora.request import LoRARequest
 from vllm.outputs import PoolingRequestOutput, ScoringRequestOutput
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils.async_utils import make_async, merge_async_iterators
+from vllm.utils.asyncio import make_async, merge_async_iterators
 
 logger = init_logger(__name__)
 

@@ -17,7 +17,7 @@
 from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
 from vllm.inputs.parse import get_prompt_components, parse_raw_prompts
 from vllm.transformers_utils.tokenizer import AnyTokenizer
-from vllm.utils.async_utils import AsyncMicrobatchTokenizer
+from vllm.utils.asyncio import AsyncMicrobatchTokenizer
 
 
 @dataclass(frozen=True)

@@ -17,7 +17,7 @@
 from vllm.lora.request import LoRARequest
 from vllm.sequence import ExecuteModelRequest
 from vllm.tasks import SupportedTask
-from vllm.utils.async_utils import make_async
+from vllm.utils.asyncio import make_async
 from vllm.v1.outputs import SamplerOutput
 from vllm.v1.worker.worker_base import WorkerBase
 

diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py
@@ -24,7 +24,7 @@
     get_ip,
     get_open_port,
 )
-from vllm.utils.async_utils import make_async
+from vllm.utils.asyncio import make_async
 from vllm.v1.outputs import SamplerOutput
 
 if ray is not None:

@@ -29,7 +29,7 @@
 )
 from vllm.utils import has_deep_gemm
 from vllm.utils.deep_gemm import m_grouped_fp8_gemm_nt_contiguous
-from vllm.utils.func import run_once
+from vllm.utils.functools import run_once
 
 logger = init_logger(__name__)
 

diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
@@ -24,7 +24,7 @@
 from vllm.inputs.data import PromptType
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QuantizationConfig
-from vllm.utils.func import supports_kw
+from vllm.utils.functools import supports_kw
 
 from .interfaces_base import VllmModel, is_pooling_model
 

diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py
@@ -15,7 +15,7 @@
 from typing_extensions import TypeIs, TypeVar
 
 from vllm.logger import init_logger
-from vllm.utils.func import supports_kw
+from vllm.utils.functools import supports_kw
 
 if TYPE_CHECKING:
     from vllm.config import VllmConfig

@@ -26,7 +26,7 @@
 from vllm.transformers_utils.processor import cached_processor_from_config
 from vllm.transformers_utils.tokenizer import AnyTokenizer, decode_tokens, encode_tokens
 from vllm.utils import flatten_2d_lists, full_groupby
-from vllm.utils.func import get_allowed_kwarg_only_overrides
+from vllm.utils.functools import get_allowed_kwarg_only_overrides
 from vllm.utils.jsontree import JSONTree, json_map_leaves
 
 from .hasher import MultiModalHasher

@@ -5,7 +5,7 @@
 from collections.abc import Mapping
 
 from vllm.logger import init_logger
-from vllm.utils.func import run_once
+from vllm.utils.functools import run_once
 
 TRACE_HEADERS = ["traceparent", "tracestate"]
 

diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py
@@ -16,7 +16,7 @@
 from transformers.video_processing_utils import BaseVideoProcessor
 from typing_extensions import TypeVar
 
-from vllm.utils.func import get_allowed_kwarg_only_overrides
+from vllm.utils.functools import get_allowed_kwarg_only_overrides
 
 if TYPE_CHECKING:
     from vllm.config import ModelConfig

diff --git a/vllm/utils/async_utils.py → vllm/utils/asyncio.py b/vllm/utils/async_utils.py → vllm/utils/asyncio.py
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""Contains helpers related to asynchronous code."""
+"""
+Contains helpers related to asynchronous code.
+
+This is similar in concept to the `asyncio` module.
+"""
 
 import asyncio
 import contextlib

diff --git a/vllm/utils/func.py → vllm/utils/functools.py b/vllm/utils/func.py → vllm/utils/functools.py
diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py
@@ -30,8 +30,8 @@
 from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs
 from vllm.usage.usage_lib import UsageContext
 from vllm.utils import Device, as_list, cdiv
-from vllm.utils.async_utils import cancel_task_threadsafe
-from vllm.utils.func import deprecate_kwargs
+from vllm.utils.asyncio import cancel_task_threadsafe
+from vllm.utils.functools import deprecate_kwargs
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.core_client import EngineCoreClient
 from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError

diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py
@@ -29,7 +29,7 @@
     get_open_zmq_inproc_path,
     make_zmq_socket,
 )
-from vllm.utils.async_utils import in_loop
+from vllm.utils.asyncio import in_loop
 from vllm.v1.engine import (
     EngineCoreOutputs,
     EngineCoreRequest,