prashanth058
diff --git a/‎tests/lora/conftest.py‎
Lines changed: 15 additions & 0 deletions b/‎tests/lora/conftest.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎tests/lora/test_qwen2vl.py‎
Lines changed: 77 additions & 1 deletion b/‎tests/lora/test_qwen2vl.py‎
Lines changed: 77 additions & 1 deletion
diff --git a/‎vllm/lora/layers/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎vllm/lora/layers/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎vllm/lora/layers/row_parallel_linear.py‎
Lines changed: 14 additions & 11 deletions b/‎vllm/lora/layers/row_parallel_linear.py‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎vllm/lora/layers/utils.py‎
Lines changed: 8 additions & 1 deletion b/‎vllm/lora/layers/utils.py‎
Lines changed: 8 additions & 1 deletion
@@ -225,6 +225,21 @@ def qwen25vl_lora_files():
     return snapshot_download(repo_id="jeeejeee/qwen25-vl-lora-pokemon")
 
 
+@pytest.fixture(scope="session")
+def qwen2vl_language_lora_files():
+    return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-language")
+
+
+@pytest.fixture(scope="session")
+def qwen2vl_vision_tower_connector_lora_files():
+    return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-tower-connector")
+
+
+@pytest.fixture(scope="session")
+def qwen2vl_vision_tower_lora_files():
+    return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-tower")
+
+
 @pytest.fixture(scope="session")
 def tinyllama_lora_files():
     return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
 
@@ -79,7 +79,6 @@ def run_test(
         lora_request = LoRARequest(str(lora_id), lora_id, self.config.lora_path)
         outputs = self.llm.generate(inputs, sampling_params, lora_request=lora_request)
         generated_texts = [output.outputs[0].text.strip() for output in outputs]
-
         # Validate outputs
         for generated, expected in zip(generated_texts, expected_outputs):
             assert expected.startswith(generated), (
@@ -130,6 +129,22 @@ def run_beam_search_test(
     "A majestic skyscraper stands tall, partially obscured by a vibrant canopy of cherry blossoms, against a clear blue sky.",  # noqa: E501
 ]
 
+EXPECTED_OUTPUTS_LANGUAGE = [
+    "A stop sign is shown in an Asian city, with buildings and a car in the "
+    "background.",
+    "The Tokyo Skytree can be seen behind the pink blossoms of the cherry trees.",
+]
+
+EXPECTED_OUTPUTS_VISION = [
+    "A stop sign in front of oriental buildings.",
+    "A tree with pink flowers in front of it and a blue sky behind the flowers.",
+]
+
+EXPECTED_OUTPUTS_VISION_NO_CONNECTOR = [
+    "A stop sign is located on the street of a Chinese neighborhood.",
+    "A closeup shot of the Tokyo Skytree with pink flowers in the foreground.",
+]
+
 # NOTE - beam search .text contains the whole text
 EXPECTED_BEAM_SEARCH_OUTPUTS = [
     [
@@ -190,3 +205,64 @@ def test_qwen25vl_lora(qwen25vl_lora_files):
     # Test with different LoRA IDs
     for lora_id in [1, 2]:
         tester.run_test(TEST_IMAGES, expected_outputs=EXPECTED_OUTPUTS, lora_id=lora_id)
+
+
+@pytest.mark.xfail(
+    current_platform.is_rocm(),
+    reason="Qwen2-VL dependency xformers incompatible with ROCm",
+)
+def test_qwen2vl_language_lora(qwen2vl_language_lora_files):
+    """
+    Test language-only LoRA adapter.
+    """
+    config = TestConfig(
+        model_path=QWEN2VL_MODEL_PATH, lora_path=qwen2vl_language_lora_files
+    )
+    tester = Qwen2VLTester(config)
+    for lora_id in [1, 2]:
+        tester.run_test(
+            TEST_IMAGES, expected_outputs=EXPECTED_OUTPUTS_LANGUAGE, lora_id=lora_id
+        )
+
+
+@pytest.mark.xfail(
+    current_platform.is_rocm(),
+    reason="Qwen2-VL dependency xformers incompatible with ROCm",
+)
+def test_qwen2vl_vision_lora(qwen2vl_vision_tower_connector_lora_files):
+    """
+    Test vision tower + connector LoRA adapter.
+    """
+    config = TestConfig(
+        model_path=QWEN2VL_MODEL_PATH,
+        lora_path=qwen2vl_vision_tower_connector_lora_files,
+    )
+    tester = Qwen2VLTester(config)
+    for lora_id in [1, 2]:
+        tester.run_test(
+            TEST_IMAGES, expected_outputs=EXPECTED_OUTPUTS_VISION, lora_id=lora_id
+        )
+
+
+@pytest.mark.xfail(
+    current_platform.is_rocm(),
+    reason="Qwen2-VL dependency xformers incompatible with ROCm",
+)
+def test_qwen2vl_vision_no_connector_lora(
+    qwen2vl_vision_tower_lora_files,
+):
+    """
+    Test vision tower only LoRA adapter.
+
+    """
+    config = TestConfig(
+        model_path=QWEN2VL_MODEL_PATH,
+        lora_path=qwen2vl_vision_tower_lora_files,
+    )
+    tester = Qwen2VLTester(config)
+    for lora_id in [1, 2]:
+        tester.run_test(
+            TEST_IMAGES,
+            expected_outputs=EXPECTED_OUTPUTS_VISION_NO_CONNECTOR,
+            lora_id=lora_id,
+        )
@@ -17,7 +17,7 @@
     RowParallelLinearWithLoRA,
     RowParallelLinearWithShardedLoRA,
 )
-from vllm.lora.layers.utils import LoRAMapping
+from vllm.lora.layers.utils import LoRAMapping, LoRAMappingType
 from vllm.lora.layers.vocal_parallel_embedding import VocabParallelEmbeddingWithLoRA
 
 __all__ = [
@@ -36,4 +36,5 @@
     "RowParallelLinearWithShardedLoRA",
     "ReplicatedLinearWithLoRA",
     "LoRAMapping",
+    "LoRAMappingType",
 ]
@@ -63,22 +63,25 @@ def forward(
             input_parallel = splitted_input[self.tp_rank].contiguous()
 
         # Matrix multiply.
-        output_parallel = self.apply(input_parallel)
+        # Only fuse bias add into GEMM for rank 0 (matches base
+        # RowParallelLinear behavior). This ensures bias will not get
+        # added more than once in TP>1 case and matches the numerical
+        # behavior of the unwrapped layer
+        bias_ = (
+            None
+            if (self.tp_rank > 0 or self.base_layer.skip_bias_add)
+            else self.base_layer.bias
+        )
+        output_parallel = self.apply(input_parallel, bias_)
+
         if self.base_layer.reduce_results and self.tp_size > 1:
             output_ = tensor_model_parallel_all_reduce(output_parallel)
         else:
             output_ = output_parallel
 
-        if not self.base_layer.skip_bias_add:
-            output = (
-                output_ + self.base_layer.bias
-                if self.base_layer.bias is not None
-                else output_
-            )
-            output_bias = None
-        else:
-            output = output_
-            output_bias = self.base_layer.bias
+        # Bias was already added by rank 0 in apply(), no need to add again
+        output_bias = self.base_layer.bias if self.base_layer.skip_bias_add else None
+        output = output_
 
         if not self.base_layer.return_bias:
             return output
 
@@ -2,17 +2,24 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 from dataclasses import dataclass
+from enum import Enum
 
 import torch
 import torch.nn as nn
 
 
+class LoRAMappingType(Enum):
+    LANGUAGE = 1
+    TOWER = 2
+    CONNECTOR = 3
+
+
 @dataclass
 class LoRAMapping:
     index_mapping: tuple[int, ...]
     prompt_mapping: tuple[int, ...]
     is_prefill: bool = False
-    is_mm_input: bool = False
+    type: LoRAMappingType = LoRAMappingType.LANGUAGE
 
     def __post_init__(self):
         self.index_mapping = tuple(self.index_mapping)
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`RowParallelLinearWithLoRA,`
`18`	`18`	`RowParallelLinearWithShardedLoRA,`
`19`	`19`	`)`
`20`		`-from vllm.lora.layers.utils import LoRAMapping`
	`20`	`+from vllm.lora.layers.utils import LoRAMapping, LoRAMappingType`
`21`	`21`	`from vllm.lora.layers.vocal_parallel_embedding import VocabParallelEmbeddingWithLoRA`
`22`	`22`
`23`	`23`	`__all__ = [`
`@@ -36,4 +36,5 @@`
`36`	`36`	`"RowParallelLinearWithShardedLoRA",`
`37`	`37`	`"ReplicatedLinearWithLoRA",`
`38`	`38`	`"LoRAMapping",`
	`39`	`+ "LoRAMappingType",`
`39`	`40`	`]`