diff --git a/vllm/executor/neuron_executor.py b/vllm/executor/neuron_executor.py index ee8e87432fa67..d45f18e466256 100644 --- a/vllm/executor/neuron_executor.py +++ b/vllm/executor/neuron_executor.py @@ -25,6 +25,7 @@ def __init__( speculative_config: Optional[SpeculativeConfig], ) -> None: self.model_config = model_config + self.cache_config = cache_config assert lora_config is None, "LoRA is not supported for Neuron backend." self.parallel_config = parallel_config self.scheduler_config = scheduler_config @@ -43,6 +44,7 @@ def _init_worker(self): self.parallel_config, self.scheduler_config, self.device_config, + self.cache_config, ) self.driver_worker.init_device() self.driver_worker.load_model()