Skip to content

Commit e12c3db

Browse files
committed
Modify unit test
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
1 parent eb2bab4 commit e12c3db

File tree

3 files changed

+27
-10
lines changed

3 files changed

+27
-10
lines changed

tests/lora/test_lora_manager.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from vllm.lora.request import LoRARequest
2020
from vllm.lora.worker_manager import (LRUCacheWorkerLoRAManager,
2121
WorkerLoRAManager)
22-
from vllm.model_executor.layers.linear import RowParallelLinear
2322
from vllm.platforms import current_platform
2423

2524
EMBEDDING_MODULES = {
@@ -114,27 +113,28 @@ def create_packed_lora(
114113

115114
def test_replace_submodules(dist_init, dummy_model):
116115
model = dummy_model
117-
model.supported_lora_modules = ["dense1", "layer1.dense2"]
118116
model.packed_modules_mapping = {}
119117
manager = LoRAModelManager(
120118
model, 1, 1, 1,
121119
LoRAConfig(max_lora_rank=8, max_cpu_loras=8, max_loras=8),
122120
torch.device(DEVICES[0]))
123121
model = manager.model
124-
125122
assert isinstance(model.get_submodule("dense1"),
126123
ColumnParallelLinearWithLoRA)
127124
assert isinstance(model.get_submodule("layer1.dense1"),
128125
ColumnParallelLinearWithLoRA)
129-
assert isinstance(model.get_submodule("dense2"), RowParallelLinear)
126+
assert isinstance(model.get_submodule("dense2"), RowParallelLinearWithLoRA)
130127
assert isinstance(model.get_submodule("layer1.dense2"),
131128
RowParallelLinearWithLoRA)
132129

133130

134131
@pytest.mark.parametrize("device", DEVICES)
135132
def test_lora_model_manager(dist_init, dummy_model, device):
136133
model = dummy_model
137-
model.supported_lora_modules = ["dense1", "dense2", "lm_head"]
134+
model.embedding_modules = {"lm_head": "lm_head"}
135+
model.packed_modules_mapping = {}
136+
model.embedding_modules = {}
137+
model.embedding_padding_modules = []
138138
model.packed_modules_mapping = {}
139139
model_lora1 = create_lora(1,
140140
model, ["layer1.dense1", "dense2", "lm_head"],
@@ -195,7 +195,7 @@ def test_lora_model_manager(dist_init, dummy_model, device):
195195
@pytest.mark.parametrize("device", DEVICES)
196196
def test_lora_lru_cache_model_manager(dist_init, dummy_model, device):
197197
model = dummy_model
198-
model.supported_lora_modules = ["dense1", "dense2", "lm_head"]
198+
model.embedding_modules = {"lm_head": "lm_head"}
199199
model.packed_modules_mapping = {}
200200
model_lora1 = create_lora(1,
201201
model, ["layer1.dense1", "dense2", "lm_head"],
@@ -289,7 +289,7 @@ def test_lru_lora_model_manager(dist_init, dummy_model, device):
289289
# This tests just the LRU cache functionality, everything else is
290290
# tested in test_lora_model_manager
291291
model = dummy_model
292-
model.supported_lora_modules = ["dense1", "dense2", "lm_head"]
292+
model.embedding_modules = {"lm_head": "lm_head"}
293293
model.packed_modules_mapping = {}
294294
model_lora1 = create_lora(1,
295295
model, ["layer1.dense1", "dense2", "lm_head"],
@@ -572,7 +572,6 @@ def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings,
572572
@pytest.mark.parametrize("device", DEVICES)
573573
def test_packed_loras(dist_init, dummy_model_gate_up, device):
574574
model = dummy_model_gate_up
575-
model.supported_lora_modules = ["gate_up_proj"]
576575
model.packed_modules_mapping = {
577576
"gate_up_proj": [
578577
"gate_proj",

vllm/lora/models.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights
2626
from vllm.lora.peft_helper import PEFTHelper
2727
from vllm.lora.punica_wrapper import get_punica_wrapper
28-
from vllm.lora.utils import (from_layer, from_layer_logits_processor,
28+
from vllm.lora.utils import (check_lora_embedding, from_layer,
29+
from_layer_logits_processor,
2930
get_supported_lora_modules,
3031
is_regex_target_modules,
3132
parse_fine_tuned_lora_name, replace_submodule)
@@ -333,7 +334,7 @@ def __init__(
333334
# Used for long context lora.
334335
self.scaling_factor_to_offset: Dict[float, int] = {}
335336
super().__init__(model)
336-
337+
check_lora_embedding(self.model)
337338
self.supported_lora_modules = get_supported_lora_modules(self.model)
338339
if lora_config.long_lora_scaling_factors:
339340
# We need to replace rotary emb layer to do batch computation

vllm/lora/utils.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,24 @@ def is_subset(sub_list, full_list):
171171
return False
172172

173173

174+
def check_lora_embedding(model: nn.Module) -> None:
175+
"""
176+
This function ensures that the model has the attributes `embedding_modules`
177+
and `embedding_padding_modules`. If these attributes do not exist, they are
178+
initialized to an empty dictionary and an empty list, respectively. The
179+
purpose of doing this is to ensure compatibility with subsequent LoRA
180+
processing logic.
181+
"""
182+
if not hasattr(model, "embedding_modules"):
183+
model.embedding_modules = {}
184+
if not hasattr(model, "embedding_padding_modules"):
185+
model.embedding_padding_modules = []
186+
187+
174188
def get_supported_lora_modules(model: nn.Module) -> List[str]:
189+
"""
190+
In vLLM, all linear layers support LoRA.
191+
"""
175192
supported_lora_modules: Set[str] = set()
176193
# step1: traverse the model to get all the linear subfixes.
177194
for name, module in model.named_modules():

0 commit comments

Comments
 (0)