From 98c5f6ee03ffe516ae1b021e2ff5f38642064780 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 18 Apr 2025 12:21:32 +0200 Subject: [PATCH 1/4] Fix test where `limit_mm_per_prompt` cannot be `None` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/engine/test_arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py index afe6c9229009..7902011519d9 100644 --- a/tests/engine/test_arg_utils.py +++ b/tests/engine/test_arg_utils.py @@ -10,7 +10,7 @@ @pytest.mark.parametrize(("arg", "expected"), [ - (None, None), + (None, dict()), ("image=16", { "image": 16 }), From a61d7881ef6eb4446ab1d34dadd391c69475e8ec Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 18 Apr 2025 12:22:38 +0200 Subject: [PATCH 2/4] Use `dict` for `limit_per_prompt` so it's detected in conversion to argparse Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config.py | 7 +++---- vllm/engine/arg_utils.py | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index db43d790c531..5b5ac40f6aa2 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -10,7 +10,6 @@ import textwrap import warnings from collections import Counter -from collections.abc import Mapping from contextlib import contextmanager from dataclasses import (MISSING, dataclass, field, fields, is_dataclass, replace) @@ -355,7 +354,7 @@ def __init__( disable_cascade_attn: bool = False, skip_tokenizer_init: bool = False, served_model_name: Optional[Union[str, list[str]]] = None, - limit_mm_per_prompt: Optional[Mapping[str, int]] = None, + limit_mm_per_prompt: Optional[dict[str, int]] = None, use_async_output_proc: bool = True, config_format: ConfigFormat = ConfigFormat.AUTO, hf_token: Optional[Union[bool, str]] = None, @@ -578,7 +577,7 @@ def maybe_pull_model_tokenizer_for_s3(self, model: str, self.tokenizer = s3_tokenizer.dir def _init_multimodal_config( - self, limit_mm_per_prompt: Optional[Mapping[str, int]] + self, limit_mm_per_prompt: Optional[dict[str, int]] ) -> Optional["MultiModalConfig"]: if self.registry.is_multimodal_model(self.architectures): return MultiModalConfig(limit_per_prompt=limit_mm_per_prompt or {}) @@ -2730,7 +2729,7 @@ def verify_with_model_config(self, model_config: ModelConfig): class MultiModalConfig: """Controls the behavior of multimodal models.""" - limit_per_prompt: Mapping[str, int] = field(default_factory=dict) + limit_per_prompt: dict[str, int] = field(default_factory=dict) """ The maximum number of input items allowed per prompt for each modality. This should be a JSON string that will be parsed into a dictionary. diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 223542a5ea11..980bcaf57175 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -7,7 +7,7 @@ import re import threading from dataclasses import MISSING, dataclass, fields -from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Literal, Mapping, +from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Tuple, Type, TypeVar, Union, cast, get_args, get_origin) @@ -191,7 +191,7 @@ class EngineArgs: TokenizerPoolConfig.pool_type tokenizer_pool_extra_config: dict[str, Any] = \ get_field(TokenizerPoolConfig, "extra_config") - limit_mm_per_prompt: Mapping[str, int] = \ + limit_mm_per_prompt: dict[str, int] = \ get_field(MultiModalConfig, "limit_per_prompt") mm_processor_kwargs: Optional[Dict[str, Any]] = None disable_mm_preprocessor_cache: bool = False From 2b96a3c2e1e8d1a6091f5aa7024f9f6a23b2e33b Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 18 Apr 2025 12:23:12 +0200 Subject: [PATCH 3/4] Fallback to `nullable_kvs` without error Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/engine/arg_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 980bcaf57175..1f719392bd9f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -112,14 +112,14 @@ def nullable_kvs(val: str) -> Optional[dict[str, int]]: def optional_dict(val: str) -> Optional[dict[str, int]]: - try: + if re.match("^{.*}$", val): return optional_arg(val, json.loads) - except ValueError: - logger.warning( - "Failed to parse JSON string. Attempting to parse as " - "comma-separated key=value pairs. This will be deprecated in a " - "future release.") - return nullable_kvs(val) + + logger.warning( + "Failed to parse JSON string. Attempting to parse as " + "comma-separated key=value pairs. This will be deprecated in a " + "future release.") + return nullable_kvs(val) @dataclass From 69ceca479ebd1ca5697aab320393cc9a8631f10a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 18 Apr 2025 14:38:57 +0200 Subject: [PATCH 4/4] Replace `str()` with `json.dumps()` Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/entrypoints/openai/test_audio.py | 4 +++- tests/entrypoints/openai/test_video.py | 4 +++- tests/entrypoints/openai/test_vision.py | 4 +++- tests/entrypoints/openai/test_vision_embedding.py | 4 +++- tests/models/decoder_only/audio_language/test_ultravox.py | 3 ++- 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/entrypoints/openai/test_audio.py b/tests/entrypoints/openai/test_audio.py index a0a3215d67b5..29d5a85af613 100644 --- a/tests/entrypoints/openai/test_audio.py +++ b/tests/entrypoints/openai/test_audio.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import openai import pytest import pytest_asyncio @@ -27,7 +29,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - str({"audio": MAXIMUM_AUDIOS}), + json.dumps({"audio": MAXIMUM_AUDIOS}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: diff --git a/tests/entrypoints/openai/test_video.py b/tests/entrypoints/openai/test_video.py index 263842b94a70..8679c2f25db4 100644 --- a/tests/entrypoints/openai/test_video.py +++ b/tests/entrypoints/openai/test_video.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import openai import pytest import pytest_asyncio @@ -31,7 +33,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - str({"video": MAXIMUM_VIDEOS}), + json.dumps({"video": MAXIMUM_VIDEOS}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: diff --git a/tests/entrypoints/openai/test_vision.py b/tests/entrypoints/openai/test_vision.py index 4aeb1700ba97..87b5cee73ecb 100644 --- a/tests/entrypoints/openai/test_vision.py +++ b/tests/entrypoints/openai/test_vision.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import openai import pytest import pytest_asyncio @@ -35,7 +37,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - str({"image": MAXIMUM_IMAGES}), + json.dumps({"image": MAXIMUM_IMAGES}), ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: diff --git a/tests/entrypoints/openai/test_vision_embedding.py b/tests/entrypoints/openai/test_vision_embedding.py index b1b24d8029b4..26c68e06c199 100644 --- a/tests/entrypoints/openai/test_vision_embedding.py +++ b/tests/entrypoints/openai/test_vision_embedding.py @@ -1,5 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 +import json + import pytest import requests from PIL import Image @@ -37,7 +39,7 @@ def server(): "--enforce-eager", "--trust-remote-code", "--limit-mm-per-prompt", - str({"image": MAXIMUM_IMAGES}), + json.dumps({"image": MAXIMUM_IMAGES}), "--chat-template", str(vlm2vec_jinja_path), ] diff --git a/tests/models/decoder_only/audio_language/test_ultravox.py b/tests/models/decoder_only/audio_language/test_ultravox.py index 3d058d1bca5b..bd1dcba6a995 100644 --- a/tests/models/decoder_only/audio_language/test_ultravox.py +++ b/tests/models/decoder_only/audio_language/test_ultravox.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 +import json from typing import Optional import numpy as np @@ -50,7 +51,7 @@ def server(request, audio_assets): args = [ "--dtype", "bfloat16", "--max-model-len", "4096", "--enforce-eager", "--limit-mm-per-prompt", - str({"audio": len(audio_assets)}), "--trust-remote-code" + json.dumps({"audio": len(audio_assets)}), "--trust-remote-code" ] + [ f"--{key.replace('_','-')}={value}" for key, value in request.param.items()