From 0ce0539d4750f9ebcd9b19d7085ca3b934b9ec67 Mon Sep 17 00:00:00 2001 From: Isotr0py <41363108+Isotr0py@users.noreply.github.com> Date: Sun, 7 Apr 2024 22:54:13 +0800 Subject: [PATCH] [Bugfix] Fix Llava inference with Tensor Parallelism. (#3883) --- vllm/executor/ray_gpu_executor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py index 24b3a8c18d92..43cb37cfb5e0 100644 --- a/vllm/executor/ray_gpu_executor.py +++ b/vllm/executor/ray_gpu_executor.py @@ -154,6 +154,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", scheduler_config = copy.deepcopy(self.scheduler_config) device_config = copy.deepcopy(self.device_config) lora_config = copy.deepcopy(self.lora_config) + vision_language_config = copy.deepcopy(self.vision_language_config) kv_cache_dtype = self.cache_config.cache_dtype # Initialize the actual workers with the Worker class. @@ -172,6 +173,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", rank, distributed_init_method, lora_config=lora_config, + vision_language_config=vision_language_config, kv_cache_dtype=kv_cache_dtype, ))