|
19 | 19 | from vllm.lora.request import LoRARequest |
20 | 20 | from vllm.lora.worker_manager import (LRUCacheWorkerLoRAManager, |
21 | 21 | WorkerLoRAManager) |
22 | | -from vllm.model_executor.layers.linear import RowParallelLinear |
23 | 22 | from vllm.platforms import current_platform |
24 | 23 |
|
25 | 24 | EMBEDDING_MODULES = { |
@@ -114,27 +113,28 @@ def create_packed_lora( |
114 | 113 |
|
115 | 114 | def test_replace_submodules(dist_init, dummy_model): |
116 | 115 | model = dummy_model |
117 | | - model.supported_lora_modules = ["dense1", "layer1.dense2"] |
118 | 116 | model.packed_modules_mapping = {} |
119 | 117 | manager = LoRAModelManager( |
120 | 118 | model, 1, 1, 1, |
121 | 119 | LoRAConfig(max_lora_rank=8, max_cpu_loras=8, max_loras=8), |
122 | 120 | torch.device(DEVICES[0])) |
123 | 121 | model = manager.model |
124 | | - |
125 | 122 | assert isinstance(model.get_submodule("dense1"), |
126 | 123 | ColumnParallelLinearWithLoRA) |
127 | 124 | assert isinstance(model.get_submodule("layer1.dense1"), |
128 | 125 | ColumnParallelLinearWithLoRA) |
129 | | - assert isinstance(model.get_submodule("dense2"), RowParallelLinear) |
| 126 | + assert isinstance(model.get_submodule("dense2"), RowParallelLinearWithLoRA) |
130 | 127 | assert isinstance(model.get_submodule("layer1.dense2"), |
131 | 128 | RowParallelLinearWithLoRA) |
132 | 129 |
|
133 | 130 |
|
134 | 131 | @pytest.mark.parametrize("device", DEVICES) |
135 | 132 | def test_lora_model_manager(dist_init, dummy_model, device): |
136 | 133 | model = dummy_model |
137 | | - model.supported_lora_modules = ["dense1", "dense2", "lm_head"] |
| 134 | + model.embedding_modules = {"lm_head": "lm_head"} |
| 135 | + model.packed_modules_mapping = {} |
| 136 | + model.embedding_modules = {} |
| 137 | + model.embedding_padding_modules = [] |
138 | 138 | model.packed_modules_mapping = {} |
139 | 139 | model_lora1 = create_lora(1, |
140 | 140 | model, ["layer1.dense1", "dense2", "lm_head"], |
@@ -195,7 +195,7 @@ def test_lora_model_manager(dist_init, dummy_model, device): |
195 | 195 | @pytest.mark.parametrize("device", DEVICES) |
196 | 196 | def test_lora_lru_cache_model_manager(dist_init, dummy_model, device): |
197 | 197 | model = dummy_model |
198 | | - model.supported_lora_modules = ["dense1", "dense2", "lm_head"] |
| 198 | + model.embedding_modules = {"lm_head": "lm_head"} |
199 | 199 | model.packed_modules_mapping = {} |
200 | 200 | model_lora1 = create_lora(1, |
201 | 201 | model, ["layer1.dense1", "dense2", "lm_head"], |
@@ -289,7 +289,7 @@ def test_lru_lora_model_manager(dist_init, dummy_model, device): |
289 | 289 | # This tests just the LRU cache functionality, everything else is |
290 | 290 | # tested in test_lora_model_manager |
291 | 291 | model = dummy_model |
292 | | - model.supported_lora_modules = ["dense1", "dense2", "lm_head"] |
| 292 | + model.embedding_modules = {"lm_head": "lm_head"} |
293 | 293 | model.packed_modules_mapping = {} |
294 | 294 | model_lora1 = create_lora(1, |
295 | 295 | model, ["layer1.dense1", "dense2", "lm_head"], |
@@ -572,7 +572,6 @@ def test_worker_adapter_manager(llama_2_7b_model_extra_embeddings, |
572 | 572 | @pytest.mark.parametrize("device", DEVICES) |
573 | 573 | def test_packed_loras(dist_init, dummy_model_gate_up, device): |
574 | 574 | model = dummy_model_gate_up |
575 | | - model.supported_lora_modules = ["gate_up_proj"] |
576 | 575 | model.packed_modules_mapping = { |
577 | 576 | "gate_up_proj": [ |
578 | 577 | "gate_proj", |
|
0 commit comments