diff --git a/vllm/config.py b/vllm/config.py index d9e4a619ee010..54f36e1d66783 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1092,6 +1092,8 @@ def verify_with_scheduler_config(self, scheduler_config: SchedulerConfig): "Due to limitations of the custom LoRA CUDA kernel, " "max_num_batched_tokens must be <= 65528 when " "LoRA is enabled.") + if scheduler_config.chunked_prefill_enabled: + raise ValueError("LoRA is not supported with chunked prefill yet.") @dataclass