Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,15 @@
from vllm.transformers_utils.tokenizer_group.base_tokenizer_group import (
BaseTokenizerGroup)

Config = TypeVar("Config", bound=DataclassInstance)
ConfigType = type[DataclassInstance]
else:
QuantizationConfig = None
Config = TypeVar("Config")
ConfigType = type

logger = init_logger(__name__)

ConfigT = TypeVar("ConfigT", bound=ConfigType)

# This value is chosen to have a balance between ITL and TTFT. Note it is
# not optimized for throughput.
_DEFAULT_MAX_NUM_BATCHED_TOKENS = 2048
Expand Down Expand Up @@ -162,7 +164,7 @@ def pairwise(iterable):
return out


def config(cls: type[Config]) -> type[Config]:
def config(cls: ConfigT) -> ConfigT:
"""
A decorator that ensures all fields in a dataclass have default values
and that each field has a docstring.
Expand All @@ -181,7 +183,7 @@ def config(cls: type[Config]) -> type[Config]:
return cls


def get_field(cls: type[Config], name: str) -> Field:
def get_field(cls: ConfigType, name: str) -> Field:
"""Get the default factory field of a dataclass by name. Used for getting
default factory fields in `EngineArgs`."""
if not is_dataclass(cls):
Expand Down Expand Up @@ -2747,6 +2749,9 @@ class MultiModalConfig:
The maximum number of input items allowed per prompt for each modality.
This should be a JSON string that will be parsed into a dictionary.
Defaults to 1 (V0) or 999 (V1) for each modality.

For example, to allow up to 16 images and 2 videos per prompt:
``{"images": 16, "videos": 2}``
"""

def compute_hash(self) -> str:
Expand Down
14 changes: 8 additions & 6 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import vllm.envs as envs
from vllm import version
from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig,
Config, ConfigFormat, DecodingConfig, Device,
ConfigFormat, ConfigType, DecodingConfig, Device,
DeviceConfig, DistributedExecutorBackend, HfOverrides,
KVTransferConfig, LoadConfig, LoadFormat, LoRAConfig,
ModelConfig, ModelImpl, MultiModalConfig,
Expand Down Expand Up @@ -304,7 +304,7 @@ def is_custom_type(cls: TypeHint) -> bool:
"""Check if the class is a custom type."""
return cls.__module__ != "builtins"

def get_kwargs(cls: type[Config]) -> dict[str, Any]:
def get_kwargs(cls: ConfigType) -> dict[str, Any]:
cls_docs = get_attr_docs(cls)
kwargs = {}
for field in fields(cls):
Expand Down Expand Up @@ -678,13 +678,15 @@ def get_kwargs(cls: type[Config]) -> dict[str, Any]:
'--mm-processor-kwargs',
default=None,
type=json.loads,
help=('Overrides for the multimodal input mapping/processing, '
'e.g., image processor. For example: ``{"num_crops": 4}``.'))
help=('Overrides for the multi-modal processor obtained from '
'``AutoProcessor.from_pretrained``. The available overrides '
'depend on the model that is being run.'
'For example, for Phi-3-Vision: ``{"num_crops": 4}``.'))
parser.add_argument(
'--disable-mm-preprocessor-cache',
action='store_true',
help='If true, then disables caching of the multi-modal '
'preprocessor/mapper. (not recommended)')
help='If True, disable caching of the processed multi-modal '
'inputs.')

# LoRA related configs
parser.add_argument('--enable-lora',
Expand Down