diff --git a/tests/lora/test_add_lora.py b/tests/lora/test_add_lora.py index 9a82ab99ea9c..d3d5e2c1cf9f 100644 --- a/tests/lora/test_add_lora.py +++ b/tests/lora/test_add_lora.py @@ -12,7 +12,7 @@ from vllm.inputs import TextPrompt from vllm.lora.request import LoRARequest from vllm.sampling_params import SamplingParams -from vllm.utils.async_utils import merge_async_iterators +from vllm.utils.asyncio import merge_async_iterators MODEL_PATH = "zai-org/chatglm3-6b" LORA_RANK = 64 diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index af7dad079a9b..7baca7800bb9 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -17,7 +17,7 @@ ) from vllm.platforms import current_platform -from vllm.utils.func import identity +from vllm.utils.functools import identity from ....conftest import ( IMAGE_ASSETS, diff --git a/tests/utils_/test_async_utils.py b/tests/utils_/test_async_utils.py index 03d116bdfd81..070cfb38fb03 100644 --- a/tests/utils_/test_async_utils.py +++ b/tests/utils_/test_async_utils.py @@ -5,7 +5,7 @@ import pytest -from vllm.utils.async_utils import merge_async_iterators +from vllm.utils.asyncio import merge_async_iterators async def _mock_async_iterator(idx: int): diff --git a/tests/utils_/test_func_utils.py b/tests/utils_/test_func_utils.py index 147a39699459..e2b5003fd062 100644 --- a/tests/utils_/test_func_utils.py +++ b/tests/utils_/test_func_utils.py @@ -4,7 +4,7 @@ import pytest -from vllm.utils.func import deprecate_kwargs, supports_kw +from vllm.utils.functools import deprecate_kwargs, supports_kw from ..utils import error_on_warning diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py index 866365ac18eb..7610b9503480 100644 --- a/vllm/benchmarks/throughput.py +++ b/vllm/benchmarks/throughput.py @@ -34,7 +34,7 @@ from vllm.lora.request import LoRARequest from vllm.outputs import RequestOutput from vllm.sampling_params import BeamSearchParams -from vllm.utils.async_utils import merge_async_iterators +from vllm.utils.asyncio import merge_async_iterators def run_vllm( diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 0d8b0280d504..10472b18d84e 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -51,7 +51,7 @@ from vllm.transformers_utils.processor import cached_get_processor from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer from vllm.utils import random_uuid -from vllm.utils.func import supports_kw +from vllm.utils.functools import supports_kw logger = init_logger(__name__) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index f33fce7716a9..b60330f71d39 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -35,7 +35,7 @@ from vllm.sampling_params import BeamSearchParams, SamplingParams from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils import as_list -from vllm.utils.async_utils import merge_async_iterators +from vllm.utils.asyncio import merge_async_iterators logger = init_logger(__name__) diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index 4c05d9f57fa6..7540a2e5d5cf 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -40,7 +40,7 @@ ) from vllm.pooling_params import PoolingParams from vllm.utils import chunk_list -from vllm.utils.async_utils import merge_async_iterators +from vllm.utils.asyncio import merge_async_iterators logger = init_logger(__name__) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 6464d4f9e675..041e47c1c797 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -91,7 +91,7 @@ ) from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer from vllm.utils import is_list_of, random_uuid -from vllm.utils.async_utils import ( +from vllm.utils.asyncio import ( AsyncMicrobatchTokenizer, collect_from_async_generator, make_async, diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 7a27348da35b..102a29fe35cd 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -36,7 +36,7 @@ from vllm.logger import init_logger from vllm.outputs import PoolingOutput, PoolingRequestOutput from vllm.tasks import SupportedTask -from vllm.utils.async_utils import merge_async_iterators +from vllm.utils.asyncio import merge_async_iterators logger = init_logger(__name__) diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py index 9cbfc9791819..de3d7fbb2c5a 100644 --- a/vllm/entrypoints/openai/serving_score.py +++ b/vllm/entrypoints/openai/serving_score.py @@ -37,7 +37,7 @@ from vllm.lora.request import LoRARequest from vllm.outputs import PoolingRequestOutput, ScoringRequestOutput from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer -from vllm.utils.async_utils import make_async, merge_async_iterators +from vllm.utils.asyncio import make_async, merge_async_iterators logger = init_logger(__name__) diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py index 63487a6ed007..a8ce5e3fc64d 100644 --- a/vllm/entrypoints/renderer.py +++ b/vllm/entrypoints/renderer.py @@ -17,7 +17,7 @@ from vllm.inputs.data import TokensPrompt as EngineTokensPrompt from vllm.inputs.parse import get_prompt_components, parse_raw_prompts from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.utils.async_utils import AsyncMicrobatchTokenizer +from vllm.utils.asyncio import AsyncMicrobatchTokenizer @dataclass(frozen=True) diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index 9de2249f6c05..cfb7950bcca0 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -17,7 +17,7 @@ from vllm.lora.request import LoRARequest from vllm.sequence import ExecuteModelRequest from vllm.tasks import SupportedTask -from vllm.utils.async_utils import make_async +from vllm.utils.asyncio import make_async from vllm.v1.outputs import SamplerOutput from vllm.v1.worker.worker_base import WorkerBase diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py index b41466a6a770..9d6747be2b33 100644 --- a/vllm/executor/ray_distributed_executor.py +++ b/vllm/executor/ray_distributed_executor.py @@ -24,7 +24,7 @@ get_ip, get_open_port, ) -from vllm.utils.async_utils import make_async +from vllm.utils.asyncio import make_async from vllm.v1.outputs import SamplerOutput if ray is not None: diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py index 169b14ba46eb..c944140877fa 100644 --- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py @@ -29,7 +29,7 @@ ) from vllm.utils import has_deep_gemm from vllm.utils.deep_gemm import m_grouped_fp8_gemm_nt_contiguous -from vllm.utils.func import run_once +from vllm.utils.functools import run_once logger = init_logger(__name__) diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index 2487d7a69113..0eef5ab7dbc0 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -24,7 +24,7 @@ from vllm.inputs.data import PromptType from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.utils.func import supports_kw +from vllm.utils.functools import supports_kw from .interfaces_base import VllmModel, is_pooling_model diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py index da1ffd254827..0c3c262da41e 100644 --- a/vllm/model_executor/models/interfaces_base.py +++ b/vllm/model_executor/models/interfaces_base.py @@ -15,7 +15,7 @@ from typing_extensions import TypeIs, TypeVar from vllm.logger import init_logger -from vllm.utils.func import supports_kw +from vllm.utils.functools import supports_kw if TYPE_CHECKING: from vllm.config import VllmConfig diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 96055551c26e..9e8bb91baed7 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -26,7 +26,7 @@ from vllm.transformers_utils.processor import cached_processor_from_config from vllm.transformers_utils.tokenizer import AnyTokenizer, decode_tokens, encode_tokens from vllm.utils import flatten_2d_lists, full_groupby -from vllm.utils.func import get_allowed_kwarg_only_overrides +from vllm.utils.functools import get_allowed_kwarg_only_overrides from vllm.utils.jsontree import JSONTree, json_map_leaves from .hasher import MultiModalHasher diff --git a/vllm/tracing.py b/vllm/tracing.py index b4008064fef0..4e55c746c88b 100644 --- a/vllm/tracing.py +++ b/vllm/tracing.py @@ -5,7 +5,7 @@ from collections.abc import Mapping from vllm.logger import init_logger -from vllm.utils.func import run_once +from vllm.utils.functools import run_once TRACE_HEADERS = ["traceparent", "tracestate"] diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py index cdc138064a33..7f9d55473357 100644 --- a/vllm/transformers_utils/processor.py +++ b/vllm/transformers_utils/processor.py @@ -16,7 +16,7 @@ from transformers.video_processing_utils import BaseVideoProcessor from typing_extensions import TypeVar -from vllm.utils.func import get_allowed_kwarg_only_overrides +from vllm.utils.functools import get_allowed_kwarg_only_overrides if TYPE_CHECKING: from vllm.config import ModelConfig diff --git a/vllm/utils/async_utils.py b/vllm/utils/asyncio.py similarity index 99% rename from vllm/utils/async_utils.py rename to vllm/utils/asyncio.py index aeabd808add5..b6c24e1ceeee 100644 --- a/vllm/utils/async_utils.py +++ b/vllm/utils/asyncio.py @@ -1,6 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Contains helpers related to asynchronous code.""" +""" +Contains helpers related to asynchronous code. + +This is similar in concept to the `asyncio` module. +""" import asyncio import contextlib diff --git a/vllm/utils/func.py b/vllm/utils/functools.py similarity index 100% rename from vllm/utils/func.py rename to vllm/utils/functools.py diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index ed9d82ca5373..8a52fcef9e73 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -30,8 +30,8 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer, init_tokenizer_from_configs from vllm.usage.usage_lib import UsageContext from vllm.utils import Device, as_list, cdiv -from vllm.utils.async_utils import cancel_task_threadsafe -from vllm.utils.func import deprecate_kwargs +from vllm.utils.asyncio import cancel_task_threadsafe +from vllm.utils.functools import deprecate_kwargs from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine.core_client import EngineCoreClient from vllm.v1.engine.exceptions import EngineDeadError, EngineGenerateError diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index a9deebc7e1f5..f2e316a90970 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -29,7 +29,7 @@ get_open_zmq_inproc_path, make_zmq_socket, ) -from vllm.utils.async_utils import in_loop +from vllm.utils.asyncio import in_loop from vllm.v1.engine import ( EngineCoreOutputs, EngineCoreRequest,