|
19 | 19 | from vllm.lora.request import LoRARequest |
20 | 20 | from vllm.lora.worker_manager import (LRUCacheWorkerLoRAManager, |
21 | 21 | WorkerLoRAManager) |
22 | | -from vllm.model_executor.layers.linear import RowParallelLinear |
23 | 22 | from vllm.platforms import current_platform |
24 | 23 |
|
25 | 24 | EMBEDDING_MODULES = { |
@@ -114,28 +113,23 @@ def create_packed_lora( |
114 | 113 |
|
115 | 114 | def test_replace_submodules(dist_init, dummy_model): |
116 | 115 | model = dummy_model |
117 | | - model.supported_lora_modules = ["dense1", "layer1.dense2"] |
118 | | - model.packed_modules_mapping = {} |
119 | 116 | manager = LoRAModelManager( |
120 | 117 | model, 1, 1, 1, |
121 | 118 | LoRAConfig(max_lora_rank=8, max_cpu_loras=8, max_loras=8), |
122 | 119 | torch.device(DEVICES[0])) |
123 | 120 | model = manager.model |
124 | | - |
125 | 121 | assert isinstance(model.get_submodule("dense1"), |
126 | 122 | ColumnParallelLinearWithLoRA) |
127 | 123 | assert isinstance(model.get_submodule("layer1.dense1"), |
128 | 124 | ColumnParallelLinearWithLoRA) |
129 | | - assert isinstance(model.get_submodule("dense2"), RowParallelLinear) |
| 125 | + assert isinstance(model.get_submodule("dense2"), RowParallelLinearWithLoRA) |
130 | 126 | assert isinstance(model.get_submodule("layer1.dense2"), |
131 | 127 | RowParallelLinearWithLoRA) |
132 | 128 |
|
133 | 129 |
|
134 | 130 | @pytest.mark.parametrize("device", DEVICES) |
135 | 131 | def test_lora_model_manager(dist_init, dummy_model, device): |
136 | 132 | model = dummy_model |
137 | | - model.supported_lora_modules = ["dense1", "dense2", "lm_head"] |
138 | | - model.packed_modules_mapping = {} |
139 | 133 | model_lora1 = create_lora(1, |
140 | 134 | model, ["layer1.dense1", "dense2", "lm_head"], |
141 | 135 | device=device) |
@@ -190,13 +184,18 @@ def test_lora_model_manager(dist_init, dummy_model, device): |
190 | 184 |
|
191 | 185 | assert manager.device == device |
192 | 186 | assert manager.punica_wrapper.device == device |
| 187 | + assert hasattr(manager, "supported_lora_modules") |
| 188 | + assert sorted(manager.supported_lora_modules) == [ |
| 189 | + "dense1", |
| 190 | + "dense2", |
| 191 | + "lm_head", |
| 192 | + "output", |
| 193 | + ] |
193 | 194 |
|
194 | 195 |
|
195 | 196 | @pytest.mark.parametrize("device", DEVICES) |
196 | 197 | def test_lora_lru_cache_model_manager(dist_init, dummy_model, device): |
197 | 198 | model = dummy_model |
198 | | - model.supported_lora_modules = ["dense1", "dense2", "lm_head"] |
199 | | - model.packed_modules_mapping = {} |
200 | 199 | model_lora1 = create_lora(1, |
201 | 200 | model, ["layer1.dense1", "dense2", "lm_head"], |
202 | 201 | device=device) |
@@ -289,8 +288,6 @@ def test_lru_lora_model_manager(dist_init, dummy_model, device): |
289 | 288 | # This tests just the LRU cache functionality, everything else is |
290 | 289 | # tested in test_lora_model_manager |
291 | 290 | model = dummy_model |
292 | | - model.supported_lora_modules = ["dense1", "dense2", "lm_head"] |
293 | | - model.packed_modules_mapping = {} |
294 | 291 | model_lora1 = create_lora(1, |
295 | 292 | model, ["layer1.dense1", "dense2", "lm_head"], |
296 | 293 | device=device) |
@@ -572,13 +569,6 @@ def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings, |
572 | 569 | @pytest.mark.parametrize("device", DEVICES) |
573 | 570 | def test_packed_loras(dist_init, dummy_model_gate_up, device): |
574 | 571 | model = dummy_model_gate_up |
575 | | - model.supported_lora_modules = ["gate_up_proj"] |
576 | | - model.packed_modules_mapping = { |
577 | | - "gate_up_proj": [ |
578 | | - "gate_proj", |
579 | | - "up_proj", |
580 | | - ], |
581 | | - } |
582 | 572 | model_lora = create_packed_lora( |
583 | 573 | 1, |
584 | 574 | model, |
|
0 commit comments