From d98edcdeee308aa6a4d509c4405ad5ec4ca694ea Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 29 May 2025 00:00:40 +0800 Subject: [PATCH 1/2] Skip device validation to make plugin device work Signed-off-by: Yikun Jiang --- vllm/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config.py b/vllm/config.py index 4d9ca580f39d..7f72c79033b5 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2231,7 +2231,7 @@ def is_multi_step(self) -> bool: class DeviceConfig: """Configuration for the device to use for vLLM execution.""" - device: Union[Device, torch.device] = "auto" + device: SkipValidation[Union[Device, torch.device]] = "auto" """Device type for vLLM execution. This parameter is deprecated and will be removed in a future release. From 47e83f25fc04769953cb09ec84423a62337c895f Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 29 May 2025 11:02:53 +0800 Subject: [PATCH 2/2] Skip quantization validation Signed-off-by: Yikun Jiang --- vllm/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/config.py b/vllm/config.py index 7f72c79033b5..25087d9d1cd4 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -304,7 +304,7 @@ class ModelConfig: - 25.6k -> 25,600""" spec_target_max_model_len: Optional[int] = None """Specify the maximum length for spec decoding draft models.""" - quantization: Optional[QuantizationMethods] = None + quantization: SkipValidation[Optional[QuantizationMethods]] = None """Method used to quantize the weights. If `None`, we first check the `quantization_config` attribute in the model config file. If that is `None`, we assume the model weights are not quantized and use `dtype` to