diff --git a/vllm/config.py b/vllm/config.py index a3ed94bc50f8..82f575cbcf93 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -54,13 +54,15 @@ from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import ( BaseTokenizerGroup) - Config = TypeVar("Config", bound=DataclassInstance) + ConfigType = type[DataclassInstance] else: QuantizationConfig = None - Config = TypeVar("Config") + ConfigType = type logger = init_logger(__name__) +ConfigT = TypeVar("ConfigT", bound=ConfigType) + # This value is chosen to have a balance between ITL and TTFT. Note it is # not optimized for throughput. _DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048 @@ -162,7 +164,7 @@ def pairwise(iterable): return out -def config(cls: type[Config]) -> type[Config]: +def config(cls: ConfigT) -> ConfigT: """ A decorator that ensures all fields in a dataclass have default values and that each field has a docstring. @@ -181,7 +183,7 @@ def config(cls: type[Config]) -> type[Config]: return cls -def get_field(cls: type[Config], name: str) -> Field: +def get_field(cls: ConfigType, name: str) -> Field: """Get the default factory field of a dataclass by name. Used for getting default factory fields in `EngineArgs`.""" if not is_dataclass(cls): @@ -2747,6 +2749,9 @@ class MultiModalConfig: The maximum number of input items allowed per prompt for each modality. This should be a JSON string that will be parsed into a dictionary. Defaults to 1 (V0) or 999 (V1) for each modality. + + For example, to allow up to 16 images and 2 videos per prompt: + ``{"images": 16, "videos": 2}`` """ def compute_hash(self) -> str: diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 06529ae25a83..61d9eee3c219 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -17,7 +17,7 @@ import vllm.envs as envs from vllm import version from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, - Config, ConfigFormat, DecodingConfig, Device, + ConfigFormat, ConfigType, DecodingConfig, Device, DeviceConfig, DistributedExecutorBackend, HfOverrides, KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig, ModelConfig, ModelImpl, MultiModalConfig, @@ -304,7 +304,7 @@ def is_custom_type(cls: TypeHint) -> bool: """Check if the class is a custom type.""" return cls.__module__ != "builtins" - def get_kwargs(cls: type[Config]) -> dict[str, Any]: + def get_kwargs(cls: ConfigType) -> dict[str, Any]: cls_docs = get_attr_docs(cls) kwargs = {} for field in fields(cls): @@ -678,13 +678,15 @@ def get_kwargs(cls: type[Config]) -> dict[str, Any]: '--mm-processor-kwargs', default=None, type=json.loads, - help=('Overrides for the multimodal input mapping/processing, ' - 'e.g., image processor. For example: ``{"num_crops": 4}``.')) + help=('Overrides for the multi-modal processor obtained from ' + '``AutoProcessor.from_pretrained``. The available overrides ' + 'depend on the model that is being run.' + 'For example, for Phi-3-Vision: ``{"num_crops": 4}``.')) parser.add_argument( '--disable-mm-preprocessor-cache', action='store_true', - help='If true, then disables caching of the multi-modal ' - 'preprocessor/mapper. (not recommended)') + help='If True, disable caching of the processed multi-modal ' + 'inputs.') # LoRA related configs parser.add_argument('--enable-lora',