Modify unit test

jeejeelee · jeejeelee · commit e12c3db65b08 · 2025-02-17T16:39:32.000Z
Signed-off-by: Jee Jee Li &lt;pandaleefree@gmail.com&gt;
diff --git a/tests/lora/test_lora_manager.py b/tests/lora/test_lora_manager.py
@@ -19,7 +19,6 @@
 from vllm.lora.request import LoRARequest
 from vllm.lora.worker_manager import (LRUCacheWorkerLoRAManager,
                                       WorkerLoRAManager)
-from vllm.model_executor.layers.linear import RowParallelLinear
 from vllm.platforms import current_platform
 
 EMBEDDING_MODULES = {
@@ -114,27 +113,28 @@ def create_packed_lora(
 
 def test_replace_submodules(dist_init, dummy_model):
     model = dummy_model
-    model.supported_lora_modules = ["dense1", "layer1.dense2"]
     model.packed_modules_mapping = {}
     manager = LoRAModelManager(
         model, 1, 1, 1,
         LoRAConfig(max_lora_rank=8, max_cpu_loras=8, max_loras=8),
         torch.device(DEVICES[0]))
     model = manager.model
-
     assert isinstance(model.get_submodule("dense1"),
                       ColumnParallelLinearWithLoRA)
     assert isinstance(model.get_submodule("layer1.dense1"),
                       ColumnParallelLinearWithLoRA)
-    assert isinstance(model.get_submodule("dense2"), RowParallelLinear)
+    assert isinstance(model.get_submodule("dense2"), RowParallelLinearWithLoRA)
     assert isinstance(model.get_submodule("layer1.dense2"),
                       RowParallelLinearWithLoRA)
 
 
 @pytest.mark.parametrize("device", DEVICES)
 def test_lora_model_manager(dist_init, dummy_model, device):
     model = dummy_model
-    model.supported_lora_modules = ["dense1", "dense2", "lm_head"]
+    model.embedding_modules = {"lm_head": "lm_head"}
+    model.packed_modules_mapping = {}
+    model.embedding_modules = {}
+    model.embedding_padding_modules = []
     model.packed_modules_mapping = {}
     model_lora1 = create_lora(1,
                               model, ["layer1.dense1", "dense2", "lm_head"],
@@ -195,7 +195,7 @@ def test_lora_model_manager(dist_init, dummy_model, device):
 @pytest.mark.parametrize("device", DEVICES)
 def test_lora_lru_cache_model_manager(dist_init, dummy_model, device):
     model = dummy_model
-    model.supported_lora_modules = ["dense1", "dense2", "lm_head"]
+    model.embedding_modules = {"lm_head": "lm_head"}
     model.packed_modules_mapping = {}
     model_lora1 = create_lora(1,
                               model, ["layer1.dense1", "dense2", "lm_head"],
@@ -289,7 +289,7 @@ def test_lru_lora_model_manager(dist_init, dummy_model, device):
     # This tests just the LRU cache functionality, everything else is
     # tested in test_lora_model_manager
     model = dummy_model
-    model.supported_lora_modules = ["dense1", "dense2", "lm_head"]
+    model.embedding_modules = {"lm_head": "lm_head"}
     model.packed_modules_mapping = {}
     model_lora1 = create_lora(1,
                               model, ["layer1.dense1", "dense2", "lm_head"],
@@ -572,7 +572,6 @@ def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings,
 @pytest.mark.parametrize("device", DEVICES)
 def test_packed_loras(dist_init, dummy_model_gate_up, device):
     model = dummy_model_gate_up
-    model.supported_lora_modules = ["gate_up_proj"]
     model.packed_modules_mapping = {
         "gate_up_proj": [
             "gate_proj",
diff --git a/vllm/lora/models.py b/vllm/lora/models.py
@@ -25,7 +25,8 @@
 from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights
 from vllm.lora.peft_helper import PEFTHelper
 from vllm.lora.punica_wrapper import get_punica_wrapper
-from vllm.lora.utils import (from_layer, from_layer_logits_processor,
+from vllm.lora.utils import (check_lora_embedding, from_layer,
+                             from_layer_logits_processor,
                              get_supported_lora_modules,
                              is_regex_target_modules,
                              parse_fine_tuned_lora_name, replace_submodule)
@@ -333,7 +334,7 @@ def __init__(
         # Used for long context lora.
         self.scaling_factor_to_offset: Dict[float, int] = {}
         super().__init__(model)
-
+        check_lora_embedding(self.model)
         self.supported_lora_modules = get_supported_lora_modules(self.model)
         if lora_config.long_lora_scaling_factors:
             # We need to replace rotary emb layer to do batch computation
diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py
@@ -171,7 +171,24 @@ def is_subset(sub_list, full_list):
     return False
 
 
+def check_lora_embedding(model: nn.Module) -> None:
+    """
+    This function ensures that the model has the attributes `embedding_modules`
+    and `embedding_padding_modules`. If these attributes do not exist, they are
+    initialized to an empty dictionary and an empty list, respectively. The 
+    purpose of doing this is to ensure compatibility with subsequent LoRA 
+    processing logic.
+    """
+    if not hasattr(model, "embedding_modules"):
+        model.embedding_modules = {}
+    if not hasattr(model, "embedding_padding_modules"):
+        model.embedding_padding_modules = []
+
+
 def get_supported_lora_modules(model: nn.Module) -> List[str]:
+    """
+    In vLLM, all linear layers support LoRA.
+    """
     supported_lora_modules: Set[str] = set()
     # step1: traverse the model to get all the linear subfixes.
     for name, module in model.named_modules():