[Bugfix] Fix Llava inference with Tensor Parallelism. (#3883)

vllm-project · Apr 7, 2024 · 0ce0539 · 0ce0539
1 parent 2f19283
commit 0ce0539
Showing 1 changed file with 2 additions and 0 deletions.
diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
@@ -154,6 +154,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
  scheduler_config = copy.deepcopy(self.scheduler_config)
  device_config = copy.deepcopy(self.device_config)
  lora_config = copy.deepcopy(self.lora_config)
+ vision_language_config = copy.deepcopy(self.vision_language_config)
  kv_cache_dtype = self.cache_config.cache_dtype
 
  # Initialize the actual workers with the Worker class.
@@ -172,6 +173,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
  rank,
  distributed_init_method,
  lora_config=lora_config,
+ vision_language_config=vision_language_config,
  kv_cache_dtype=kv_cache_dtype,
  ))