From acb78b22bdd8fd3784448deb4995639052388067 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Wed, 3 Sep 2025 23:14:17 -0700 Subject: [PATCH 01/12] introduce config parser Signed-off-by: Xingyu Liu --- vllm/transformers_utils/config.py | 167 +++++++++++++++++- vllm/transformers_utils/config_parser_base.py | 20 +++ 2 files changed, 182 insertions(+), 5 deletions(-) create mode 100644 vllm/transformers_utils/config_parser_base.py diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index bec792465bfb..2643314f1b7d 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -27,6 +27,7 @@ from vllm import envs from vllm.logger import init_logger +from vllm.transformers_utils.config_parser_base import ConfigParserBase from vllm.transformers_utils.utils import check_gguf_file if envs.VLLM_USE_MODELSCOPE: @@ -105,6 +106,149 @@ class ConfigFormat(str, enum.Enum): MISTRAL = "mistral" +class HFConfigParser(ConfigParserBase): + + def parse(self, + model: Union[str, Path], + trust_remote_code: bool, + revision: Optional[str] = None, + code_revision: Optional[str] = None, + **kwargs) -> tuple[dict, PretrainedConfig]: + kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE + config_dict, _ = PretrainedConfig.get_config_dict( + model, + revision=revision, + code_revision=code_revision, + token=_get_hf_token(), + **kwargs, + ) + # Use custom model class if it's in our registry + model_type = config_dict.get("model_type") + if model_type is None: + model_type = "speculators" if config_dict.get( + "speculators_config") is not None else model_type + + if model_type in _CONFIG_REGISTRY: + config_class = _CONFIG_REGISTRY[model_type] + config = config_class.from_pretrained( + model, + revision=revision, + code_revision=code_revision, + token=_get_hf_token(), + **kwargs, + ) + else: + try: + kwargs = _maybe_update_auto_config_kwargs( + kwargs, model_type=model_type) + config = AutoConfig.from_pretrained( + model, + trust_remote_code=trust_remote_code, + revision=revision, + code_revision=code_revision, + token=_get_hf_token(), + **kwargs, + ) + except ValueError as e: + if (not trust_remote_code + and "requires you to execute the configuration file" + in str(e)): + err_msg = ( + "Failed to load the model config. If the model " + "is a custom model not yet available in the " + "HuggingFace transformers library, consider setting " + "`trust_remote_code=True` in LLM or using the " + "`--trust-remote-code` flag in the CLI.") + raise RuntimeError(err_msg) from e + else: + raise e + config = _maybe_remap_hf_config_attrs(config) + return config_dict, config + + +class MistralConfigParser(ConfigParserBase): + + def parse(self, + model: Union[str, Path], + trust_remote_code: bool, + revision: Optional[str] = None, + code_revision: Optional[str] = None, + **kwargs) -> tuple[dict, PretrainedConfig]: + # This function loads a params.json config which + # should be used when loading models in mistral format + config_dict = _download_mistral_config_file(model, revision) + if (max_position_embeddings := + config_dict.get("max_position_embeddings")) is None: + max_position_embeddings = _maybe_retrieve_max_pos_from_hf( + model, revision, **kwargs) + config_dict["max_position_embeddings"] = max_position_embeddings + + from vllm.transformers_utils.configs.mistral import adapt_config_dict + + config = adapt_config_dict(config_dict) + + # Mistral configs may define sliding_window as list[int]. Convert it + # to int and add the layer_types list[str] to make it HF compatible + if ((sliding_window := getattr(config, "sliding_window", None)) + and isinstance(sliding_window, list)): + pattern_repeats = config.num_hidden_layers // len(sliding_window) + layer_types = sliding_window * pattern_repeats + config.layer_types = [ + "full_attention" if layer_type is None else "sliding_attention" + for layer_type in layer_types + ] + config.sliding_window = next(filter(None, sliding_window), None) + + return config_dict, config + + +_CONFIG_FORMAT_TO_CONFIG_PARSER: dict[str, type[ConfigParserBase]] = { + "hf": HFConfigParser, + "mistral": MistralConfigParser, +} + + +def register_config_parser(config_format: str): + """Register a customized vllm model loader. + When a load format is not supported by vllm, you can register a customized + model loader to support it. + Args: + load_format (str): The model loader format name. + Examples: + >>> from vllm.config import LoadConfig + >>> from vllm.model_executor.model_loader import get_model_loader, register_model_loader + >>> from vllm.model_executor.model_loader.base_loader import BaseModelLoader + >>> + >>> @register_model_loader("my_loader") + ... class MyModelLoader(BaseModelLoader): + ... def download_model(self): + ... pass + ... + ... def load_weights(self): + ... pass + >>> + >>> load_config = LoadConfig(load_format="my_loader") + >>> type(get_model_loader(load_config)) + + """ # noqa: E501 + + def _wrapper(config_parser_cls): + if config_format in _CONFIG_FORMAT_TO_CONFIG_PARSER: + logger.warning( + "Load format `%s` is already registered, and will be " + "overwritten by the new loader class `%s`.", config_format, + config_parser_cls) + if not issubclass(config_parser_cls, ConfigParserBase): + raise ValueError("The model loader must be a subclass of " + "`BaseModelLoader`.") + _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format] = config_parser_cls + logger.info("Registered model loader `%s` with load format `%s`", + config_parser_cls, config_format) + return config_parser_cls + + return _wrapper + + _R = TypeVar("_R") @@ -349,7 +493,7 @@ def get_config( trust_remote_code: bool, revision: Optional[str] = None, code_revision: Optional[str] = None, - config_format: ConfigFormat = ConfigFormat.AUTO, + config_format: Union[ConfigFormat, str] = ConfigFormat.AUTO, hf_overrides_kw: Optional[dict[str, Any]] = None, hf_overrides_fn: Optional[Callable[[PretrainedConfig], PretrainedConfig]] = None, @@ -361,16 +505,16 @@ def get_config( if is_gguf: kwargs["gguf_file"] = Path(model).name model = Path(model).parent - + print("lxy here") if config_format == ConfigFormat.AUTO: try: if is_gguf or file_or_path_exists( model, HF_CONFIG_NAME, revision=revision): - config_format = ConfigFormat.HF + config_format_str = ConfigFormat.HF.value elif file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision): - config_format = ConfigFormat.MISTRAL + config_format_str = ConfigFormat.MISTRAL.value else: raise ValueError( "Could not detect config format for no config file found. " @@ -393,7 +537,20 @@ def get_config( "supported.\n").format(model=model) raise ValueError(error_message) from e + else: + # out-of-tree plugin must use string config_format + config_format_str = config_format.value if isinstance( + config_format, ConfigFormat) else config_format + config_parser = _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format_str]() + config_dict, config = config_parser.parse( + model, + trust_remote_code=trust_remote_code, + revision=revision, + code_revision=code_revision, + **kwargs, + ) + """ if config_format == ConfigFormat.HF: kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE config_dict, _ = PretrainedConfig.get_config_dict( @@ -479,7 +636,7 @@ def get_config( f"Supported formats are: {', '.join(supported_formats)}. " f"Ensure your model uses one of these configuration formats " f"or specify the correct format explicitly.") - + """ # Special architecture mapping check for GGUF models if is_gguf: if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES: diff --git a/vllm/transformers_utils/config_parser_base.py b/vllm/transformers_utils/config_parser_base.py new file mode 100644 index 000000000000..c27177f74d4b --- /dev/null +++ b/vllm/transformers_utils/config_parser_base.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Optional, Union + +from transformers import PretrainedConfig + + +class ConfigParserBase(ABC): + + @abstractmethod + def parse(self, + model: Union[str, Path], + trust_remote_code: bool, + revision: Optional[str] = None, + code_revision: Optional[str] = None, + **kwargs) -> tuple[dict, PretrainedConfig]: + raise NotImplementedError From 67f20b22fc6d81f774c32ab60980ea9beb255098 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Wed, 3 Sep 2025 23:44:24 -0700 Subject: [PATCH 02/12] add tests Signed-off-by: Xingyu Liu --- .../test_config_parser_registry.py | 37 +++++++++++++++++++ vllm/config/__init__.py | 3 -- vllm/transformers_utils/config.py | 9 ++++- 3 files changed, 45 insertions(+), 4 deletions(-) create mode 100644 tests/transformers_utils/test_config_parser_registry.py diff --git a/tests/transformers_utils/test_config_parser_registry.py b/tests/transformers_utils/test_config_parser_registry.py new file mode 100644 index 000000000000..a65ed798f77a --- /dev/null +++ b/tests/transformers_utils/test_config_parser_registry.py @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from pathlib import Path +from typing import Optional, Union + +import pytest +from transformers import PretrainedConfig + +from vllm.transformers_utils.config import (get_config_parser, + register_config_parser) +from vllm.transformers_utils.config_parser_base import ConfigParserBase + + +@register_config_parser("custom_config_parser") +class CustomConfigParser(ConfigParserBase): + + def parse(self, + model: Union[str, Path], + trust_remote_code: bool, + revision: Optional[str] = None, + code_revision: Optional[str] = None, + **kwargs) -> tuple[dict, PretrainedConfig]: + raise NotImplementedError + + +def test_register_model_loader(): + assert isinstance(get_config_parser("custom_config_parser"), + CustomConfigParser) + + +def test_invalid_model_loader(): + with pytest.raises(ValueError): + + @register_config_parser("invalid_config_parser") + class InValidModelLoader: + pass diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index fd3ad2c8a6d6..49ef73c320a5 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -627,9 +627,6 @@ def __post_init__(self) -> None: raise ValueError( "Sleep mode is not supported on current platform.") - if isinstance(self.config_format, str): - self.config_format = ConfigFormat(self.config_format) - hf_config = get_config(self.hf_config_path or self.model, self.trust_remote_code, self.revision, diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 2643314f1b7d..55dcd4eec540 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -208,6 +208,13 @@ def parse(self, } +def get_config_parser(config_format: str) -> ConfigParserBase: + """Get the config parser for a given config format.""" + if config_format not in _CONFIG_FORMAT_TO_CONFIG_PARSER: + raise ValueError(f"Unknown load format `{config_format}`.") + return _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format]() + + def register_config_parser(config_format: str): """Register a customized vllm model loader. When a load format is not supported by vllm, you can register a customized @@ -542,7 +549,7 @@ def get_config( config_format_str = config_format.value if isinstance( config_format, ConfigFormat) else config_format - config_parser = _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format_str]() + config_parser = get_config_parser(config_format_str) config_dict, config = config_parser.parse( model, trust_remote_code=trust_remote_code, From 6904978e585893aed447424f827234e1d30d419f Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Thu, 4 Sep 2025 00:30:29 -0700 Subject: [PATCH 03/12] clean comments Signed-off-by: Xingyu Liu --- vllm/transformers_utils/config.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 55dcd4eec540..e288bb0e6ee0 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -216,28 +216,6 @@ def get_config_parser(config_format: str) -> ConfigParserBase: def register_config_parser(config_format: str): - """Register a customized vllm model loader. - When a load format is not supported by vllm, you can register a customized - model loader to support it. - Args: - load_format (str): The model loader format name. - Examples: - >>> from vllm.config import LoadConfig - >>> from vllm.model_executor.model_loader import get_model_loader, register_model_loader - >>> from vllm.model_executor.model_loader.base_loader import BaseModelLoader - >>> - >>> @register_model_loader("my_loader") - ... class MyModelLoader(BaseModelLoader): - ... def download_model(self): - ... pass - ... - ... def load_weights(self): - ... pass - >>> - >>> load_config = LoadConfig(load_format="my_loader") - >>> type(get_model_loader(load_config)) - - """ # noqa: E501 def _wrapper(config_parser_cls): if config_format in _CONFIG_FORMAT_TO_CONFIG_PARSER: From b4a6715cbd99fc54678673f0098ee64d6a508683 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Thu, 4 Sep 2025 13:26:11 -0700 Subject: [PATCH 04/12] add docstring Signed-off-by: Xingyu Liu --- vllm/transformers_utils/config.py | 121 ++++++++---------------------- 1 file changed, 30 insertions(+), 91 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index e288bb0e6ee0..11e0de6d7bd1 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -217,6 +217,31 @@ def get_config_parser(config_format: str) -> ConfigParserBase: def register_config_parser(config_format: str): + """Register a customized vllm config parser. + When a config format is not supported by vllm, you can register a customized + config parser to support it. + Args: + config_format (str): The config parser format name. + Examples: + + >>> from vllm.transformers_utils.config import (get_config_parser, + register_config_parser) + >>> from vllm.transformers_utils.config_parser_base import ConfigParserBase + >>> + >>> @register_config_parser("custom_config_parser") + ... class CustomConfigParser(ConfigParserBase): + ... def parse(self, + ... model: Union[str, Path], + ... trust_remote_code: bool, + ... revision: Optional[str] = None, + ... code_revision: Optional[str] = None, + ... **kwargs) -> tuple[dict, PretrainedConfig]: + ... raise NotImplementedError + >>> + >>> type(get_config_parser("custom_config_parser")) + + """ # noqa: E501 + def _wrapper(config_parser_cls): if config_format in _CONFIG_FORMAT_TO_CONFIG_PARSER: logger.warning( @@ -490,7 +515,7 @@ def get_config( if is_gguf: kwargs["gguf_file"] = Path(model).name model = Path(model).parent - print("lxy here") + if config_format == ConfigFormat.AUTO: try: if is_gguf or file_or_path_exists( @@ -503,8 +528,10 @@ def get_config( else: raise ValueError( "Could not detect config format for no config file found. " - "Ensure your model has either config.json (HF format) " - "or params.json (Mistral format).") + "With ConfigFormat.AUTO, ensure your model has either" + "config.json (HF format) or params.json (Mistral format)." + "Otherwise please specify your_custom_config_format" + "in engine args for customized config parser") except Exception as e: error_message = ( @@ -523,7 +550,6 @@ def get_config( raise ValueError(error_message) from e else: - # out-of-tree plugin must use string config_format config_format_str = config_format.value if isinstance( config_format, ConfigFormat) else config_format @@ -535,93 +561,6 @@ def get_config( code_revision=code_revision, **kwargs, ) - """ - if config_format == ConfigFormat.HF: - kwargs["local_files_only"] = huggingface_hub.constants.HF_HUB_OFFLINE - config_dict, _ = PretrainedConfig.get_config_dict( - model, - revision=revision, - code_revision=code_revision, - token=_get_hf_token(), - **kwargs, - ) - # Use custom model class if it's in our registry - model_type = config_dict.get("model_type") - if model_type is None: - model_type = "speculators" if config_dict.get( - "speculators_config") is not None else model_type - - if model_type in _CONFIG_REGISTRY: - config_class = _CONFIG_REGISTRY[model_type] - config = config_class.from_pretrained( - model, - revision=revision, - code_revision=code_revision, - token=_get_hf_token(), - **kwargs, - ) - else: - try: - kwargs = _maybe_update_auto_config_kwargs( - kwargs, model_type=model_type) - config = AutoConfig.from_pretrained( - model, - trust_remote_code=trust_remote_code, - revision=revision, - code_revision=code_revision, - token=_get_hf_token(), - **kwargs, - ) - except ValueError as e: - if (not trust_remote_code - and "requires you to execute the configuration file" - in str(e)): - err_msg = ( - "Failed to load the model config. If the model " - "is a custom model not yet available in the " - "HuggingFace transformers library, consider setting " - "`trust_remote_code=True` in LLM or using the " - "`--trust-remote-code` flag in the CLI.") - raise RuntimeError(err_msg) from e - else: - raise e - config = _maybe_remap_hf_config_attrs(config) - - elif config_format == ConfigFormat.MISTRAL: - # This function loads a params.json config which - # should be used when loading models in mistral format - config_dict = _download_mistral_config_file(model, revision) - if (max_position_embeddings := - config_dict.get("max_position_embeddings")) is None: - max_position_embeddings = _maybe_retrieve_max_pos_from_hf( - model, revision, **kwargs) - config_dict["max_position_embeddings"] = max_position_embeddings - - from vllm.transformers_utils.configs.mistral import adapt_config_dict - - config = adapt_config_dict(config_dict) - - # Mistral configs may define sliding_window as list[int]. Convert it - # to int and add the layer_types list[str] to make it HF compatible - if ((sliding_window := getattr(config, "sliding_window", None)) - and isinstance(sliding_window, list)): - pattern_repeats = config.num_hidden_layers // len(sliding_window) - layer_types = sliding_window * pattern_repeats - config.layer_types = [ - "full_attention" if layer_type is None else "sliding_attention" - for layer_type in layer_types - ] - config.sliding_window = next(filter(None, sliding_window), None) - else: - supported_formats = [ - fmt.value for fmt in ConfigFormat if fmt != ConfigFormat.AUTO - ] - raise ValueError( - f"Unsupported config format: {config_format}. " - f"Supported formats are: {', '.join(supported_formats)}. " - f"Ensure your model uses one of these configuration formats " - f"or specify the correct format explicitly.") - """ # Special architecture mapping check for GGUF models if is_gguf: if config.model_type not in MODEL_FOR_CAUSAL_LM_MAPPING_NAMES: From 60d06d6c4bf321d007ea540077478ae88b867aaf Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Thu, 4 Sep 2025 13:31:16 -0700 Subject: [PATCH 05/12] add __init__ Signed-off-by: Xingyu Liu --- tests/transformers_utils/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/transformers_utils/__init__.py diff --git a/tests/transformers_utils/__init__.py b/tests/transformers_utils/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From 1f651dec49e21024fb3e7308dcc92bba26edc97b Mon Sep 17 00:00:00 2001 From: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:43:14 -0700 Subject: [PATCH 06/12] change value error string Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> --- vllm/transformers_utils/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 11e0de6d7bd1..c6e3ae962c60 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -211,7 +211,7 @@ def parse(self, def get_config_parser(config_format: str) -> ConfigParserBase: """Get the config parser for a given config format.""" if config_format not in _CONFIG_FORMAT_TO_CONFIG_PARSER: - raise ValueError(f"Unknown load format `{config_format}`.") + raise ValueError(f"Unknown config format `{config_format}`.") return _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format]() From afee616d0bda1dbaa595393ac416dbba7151c4f8 Mon Sep 17 00:00:00 2001 From: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:43:47 -0700 Subject: [PATCH 07/12] Update tests name Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> --- tests/transformers_utils/test_config_parser_registry.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/transformers_utils/test_config_parser_registry.py b/tests/transformers_utils/test_config_parser_registry.py index a65ed798f77a..13c654e05d2a 100644 --- a/tests/transformers_utils/test_config_parser_registry.py +++ b/tests/transformers_utils/test_config_parser_registry.py @@ -24,14 +24,14 @@ def parse(self, raise NotImplementedError -def test_register_model_loader(): +def test_register_config_parser(): assert isinstance(get_config_parser("custom_config_parser"), CustomConfigParser) -def test_invalid_model_loader(): +def test_invalid_config_parser(): with pytest.raises(ValueError): @register_config_parser("invalid_config_parser") - class InValidModelLoader: + class InvalidConfigParser: pass From 75e613e222aae85ad7a08e9e0cb306918179b792 Mon Sep 17 00:00:00 2001 From: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> Date: Thu, 4 Sep 2025 13:44:00 -0700 Subject: [PATCH 08/12] update string Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Xingyu Liu <38244988+charlotte12l@users.noreply.github.com> --- vllm/transformers_utils/config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index c6e3ae962c60..0c531bb42437 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -245,14 +245,14 @@ def register_config_parser(config_format: str): def _wrapper(config_parser_cls): if config_format in _CONFIG_FORMAT_TO_CONFIG_PARSER: logger.warning( - "Load format `%s` is already registered, and will be " - "overwritten by the new loader class `%s`.", config_format, + "Config format `%s` is already registered, and will be " + "overwritten by the new parser class `%s`.", config_format, config_parser_cls) if not issubclass(config_parser_cls, ConfigParserBase): - raise ValueError("The model loader must be a subclass of " - "`BaseModelLoader`.") + raise ValueError("The config parser must be a subclass of " + "`ConfigParserBase`.") _CONFIG_FORMAT_TO_CONFIG_PARSER[config_format] = config_parser_cls - logger.info("Registered model loader `%s` with load format `%s`", + logger.info("Registered config parser `%s` with config format `%s`", config_parser_cls, config_format) return config_parser_cls From ad3edfb60d3a82e2b7ddb07e6d2c3026577a1a48 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Sun, 7 Sep 2025 16:52:54 -0700 Subject: [PATCH 09/12] make ConfigFormat Literal Signed-off-by: Xingyu Liu --- vllm/config/__init__.py | 2 +- vllm/engine/arg_utils.py | 7 +++---- vllm/transformers_utils/config.py | 32 ++++++++++++++----------------- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 108baf9276f9..5f0965c049df 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -420,7 +420,7 @@ class ModelConfig: `--media-io-kwargs '{"video": {"num_frames": 40} }'` """ use_async_output_proc: bool = True """Whether to use async output processor.""" - config_format: Union[str, ConfigFormat] = ConfigFormat.AUTO.value + config_format: Union[str, ConfigFormat] = "auto" """The format of the model config to load:\n - "auto" will try to load the config in hf format if available else it will try to load in mistral format.\n diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 71ee90040f37..e1fd65d08b4e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -22,9 +22,9 @@ import vllm.envs as envs from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, - ConfigFormat, ConfigType, ConvertOption, - DecodingConfig, DetailedTraceModules, Device, - DeviceConfig, DistributedExecutorBackend, EPLBConfig, + ConfigType, ConvertOption, DecodingConfig, + DetailedTraceModules, Device, DeviceConfig, + DistributedExecutorBackend, EPLBConfig, GuidedDecodingBackend, HfOverrides, KVEventsConfig, KVTransferConfig, LoadConfig, LogprobsMode, LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig, @@ -547,7 +547,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help="Disable async output processing. This may result in " "lower performance.") model_group.add_argument("--config-format", - choices=[f.value for f in ConfigFormat], **model_kwargs["config_format"]) # This one is a special case because it can bool # or str. TODO: Handle this in get_kwargs diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 05c0fcb77784..d6ebcdf80525 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -1,13 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import enum import json import os import time from functools import cache, partial from pathlib import Path -from typing import Any, Callable, Optional, TypeVar, Union +from typing import Any, Callable, Literal, Optional, TypeVar, Union import huggingface_hub from huggingface_hub import get_safetensors_metadata, hf_hub_download @@ -101,12 +100,6 @@ def __getitem__(self, key): } -class ConfigFormat(str, enum.Enum): - AUTO = "auto" - HF = "hf" - MISTRAL = "mistral" - - class HFConfigParser(ConfigParserBase): def parse(self, @@ -208,6 +201,12 @@ def parse(self, "mistral": MistralConfigParser, } +ConfigFormat = Literal[ + "auto", + "hf", + "mistral", +] + def get_config_parser(config_format: str) -> ConfigParserBase: """Get the config parser for a given config format.""" @@ -504,7 +503,7 @@ def get_config( trust_remote_code: bool, revision: Optional[str] = None, code_revision: Optional[str] = None, - config_format: Union[ConfigFormat, str] = ConfigFormat.AUTO, + config_format: Union[str, ConfigFormat] = "auto", hf_overrides_kw: Optional[dict[str, Any]] = None, hf_overrides_fn: Optional[Callable[[PretrainedConfig], PretrainedConfig]] = None, @@ -517,19 +516,19 @@ def get_config( kwargs["gguf_file"] = Path(model).name model = Path(model).parent - if config_format == ConfigFormat.AUTO: + if config_format == "auto": try: if is_gguf or file_or_path_exists( model, HF_CONFIG_NAME, revision=revision): - config_format_str = ConfigFormat.HF.value + config_format = "hf" elif file_or_path_exists(model, MISTRAL_CONFIG_NAME, revision=revision): - config_format_str = ConfigFormat.MISTRAL.value + config_format = "mistral" else: raise ValueError( "Could not detect config format for no config file found. " - "With ConfigFormat.AUTO, ensure your model has either" + "With config_format 'auto', ensure your model has either" "config.json (HF format) or params.json (Mistral format)." "Otherwise please specify your_custom_config_format" "in engine args for customized config parser") @@ -550,11 +549,8 @@ def get_config( "supported.\n").format(model=model) raise ValueError(error_message) from e - else: - config_format_str = config_format.value if isinstance( - config_format, ConfigFormat) else config_format - config_parser = get_config_parser(config_format_str) + config_parser = get_config_parser(config_format) config_dict, config = config_parser.parse( model, trust_remote_code=trust_remote_code, @@ -995,7 +991,7 @@ def _maybe_retrieve_max_pos_from_hf(model, revision, **kwargs) -> int: hf_config = get_config(model=model, trust_remote_code=trust_remote_code_val, revision=revision, - config_format=ConfigFormat.HF) + config_format="hf") if hf_value := hf_config.get_text_config().max_position_embeddings: max_position_embeddings = hf_value except Exception as e: From d4b2a1ad8ebb6e2a9a0390e50d6e1d17d4685d13 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Sun, 7 Sep 2025 17:28:20 -0700 Subject: [PATCH 10/12] add metavar Signed-off-by: Xingyu Liu --- vllm/engine/arg_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index e1fd65d08b4e..b394144dfcdf 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -22,9 +22,9 @@ import vllm.envs as envs from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, - ConfigType, ConvertOption, DecodingConfig, - DetailedTraceModules, Device, DeviceConfig, - DistributedExecutorBackend, EPLBConfig, + ConfigFormat, ConfigType, ConvertOption, + DecodingConfig, DetailedTraceModules, Device, + DeviceConfig, DistributedExecutorBackend, EPLBConfig, GuidedDecodingBackend, HfOverrides, KVEventsConfig, KVTransferConfig, LoadConfig, LogprobsMode, LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig, @@ -547,6 +547,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help="Disable async output processing. This may result in " "lower performance.") model_group.add_argument("--config-format", + metavar=",".join(get_args(ConfigFormat)), **model_kwargs["config_format"]) # This one is a special case because it can bool # or str. TODO: Handle this in get_kwargs From 22fb0a6f1f57b8f5959ca94c1be0a839115ad448 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Sun, 7 Sep 2025 20:30:06 -0700 Subject: [PATCH 11/12] update metavar Signed-off-by: Xingyu Liu --- vllm/engine/arg_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index b394144dfcdf..4468e269846a 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -546,9 +546,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=EngineArgs.disable_async_output_proc, help="Disable async output processing. This may result in " "lower performance.") - model_group.add_argument("--config-format", - metavar=",".join(get_args(ConfigFormat)), - **model_kwargs["config_format"]) + config_format_kwargs = model_kwargs["config_format"].copy() + config_format_kwargs["metavar"] = ",".join(get_args(ConfigFormat)) + model_group.add_argument("--config-format", **config_format_kwargs) # This one is a special case because it can bool # or str. TODO: Handle this in get_kwargs model_group.add_argument("--hf-token", From 6620df76ee230ca67a55944b9a3e85f7fa0f9f51 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Mon, 8 Sep 2025 10:01:27 -0700 Subject: [PATCH 12/12] update arg_utils Signed-off-by: Xingyu Liu --- vllm/engine/arg_utils.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 4468e269846a..e1fd65d08b4e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -22,9 +22,9 @@ import vllm.envs as envs from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, - ConfigFormat, ConfigType, ConvertOption, - DecodingConfig, DetailedTraceModules, Device, - DeviceConfig, DistributedExecutorBackend, EPLBConfig, + ConfigType, ConvertOption, DecodingConfig, + DetailedTraceModules, Device, DeviceConfig, + DistributedExecutorBackend, EPLBConfig, GuidedDecodingBackend, HfOverrides, KVEventsConfig, KVTransferConfig, LoadConfig, LogprobsMode, LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig, @@ -546,9 +546,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: default=EngineArgs.disable_async_output_proc, help="Disable async output processing. This may result in " "lower performance.") - config_format_kwargs = model_kwargs["config_format"].copy() - config_format_kwargs["metavar"] = ",".join(get_args(ConfigFormat)) - model_group.add_argument("--config-format", **config_format_kwargs) + model_group.add_argument("--config-format", + **model_kwargs["config_format"]) # This one is a special case because it can bool # or str. TODO: Handle this in get_kwargs model_group.add_argument("--hf-token",