Skip to content

Commit e238617

Browse files
jeejeeleelk-chen
authored andcommitted
[CI/Build] Further clean up LoRA tests (vllm-project#15920)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
1 parent df44cce commit e238617

File tree

6 files changed

+9
-46
lines changed

6 files changed

+9
-46
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ steps:
289289
source_file_dependencies:
290290
- vllm/lora
291291
- tests/lora
292-
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py --ignore=lora/test_transfomers_model.py
292+
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
293293
parallelism: 4
294294

295295
- label: PyTorch Fullgraph Smoke Test # 9min
@@ -602,8 +602,6 @@ steps:
602602
# requires multi-GPU testing for validation.
603603
- pytest -v -s -x lora/test_chatglm3_tp.py
604604
- pytest -v -s -x lora/test_llama_tp.py
605-
- pytest -v -s -x lora/test_minicpmv_tp.py
606-
- pytest -v -s -x lora/test_transfomers_model.py
607605

608606

609607
- label: Weight Loading Multiple GPU Test # 33min

tests/lora/conftest.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import tempfile
44
from collections import OrderedDict
5-
from typing import TypedDict
65
from unittest.mock import MagicMock, patch
76

87
import pytest
@@ -26,28 +25,6 @@
2625
from vllm.platforms import current_platform
2726

2827

29-
class ContextIDInfo(TypedDict):
30-
lora_id: int
31-
context_length: str
32-
33-
34-
class ContextInfo(TypedDict):
35-
lora: str
36-
context_length: str
37-
38-
39-
LONG_LORA_INFOS: list[ContextIDInfo] = [{
40-
"lora_id": 1,
41-
"context_length": "16k",
42-
}, {
43-
"lora_id": 2,
44-
"context_length": "16k",
45-
}, {
46-
"lora_id": 3,
47-
"context_length": "32k",
48-
}]
49-
50-
5128
@pytest.fixture()
5229
def should_do_global_cleanup_after_test(request) -> bool:
5330
"""Allow subdirectories to skip global cleanup by overriding this fixture.

tests/lora/test_layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
# prefill stage(True) or decode stage(False)
6060
STAGES = [True, False]
6161

62-
NUM_RANDOM_SEEDS = 10
62+
NUM_RANDOM_SEEDS = 6
6363

6464
VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS = 128
6565

tests/lora/test_llama_tp.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
153153
enable_chunked_prefill=True,
154154
)
155155
generate_and_test(llm, sql_lora_files)
156-
157-
158-
@multi_gpu_test(num_gpus=4)
159-
@create_new_process_for_each_test()
160-
def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):
161-
162-
llm = vllm.LLM(
163-
MODEL_PATH,
164-
enable_lora=True,
165-
max_num_seqs=16,
166-
max_loras=4,
167-
tensor_parallel_size=4,
168-
fully_sharded_loras=True,
169-
enable_lora_bias=True,
170-
enable_chunked_prefill=True,
171-
)
172-
generate_and_test(llm, sql_lora_files)

tests/lora/test_minicpmv_tp.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
5858
@pytest.mark.xfail(
5959
current_platform.is_rocm(),
6060
reason="MiniCPM-V dependency xformers incompatible with ROCm")
61-
@create_new_process_for_each_test()
6261
def test_minicpmv_lora(minicpmv_lora_files):
6362
llm = vllm.LLM(
6463
MODEL_PATH,

tests/lora/test_transfomers_model.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3+
import pytest
4+
35
import vllm
46
from vllm.lora.request import LoRARequest
7+
from vllm.platforms import current_platform
58

69
from ..utils import create_new_process_for_each_test, multi_gpu_test
710

@@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
4447
return generated_texts
4548

4649

47-
@create_new_process_for_each_test()
4850
def test_ilama_lora(ilama_lora_files):
4951
llm = vllm.LLM(MODEL_PATH,
5052
max_model_len=1024,
@@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
6365
assert output2[i] == EXPECTED_LORA_OUTPUT[i]
6466

6567

68+
@pytest.mark.skipif(current_platform.is_cuda_alike(),
69+
reason="Skipping to avoid redundant model tests")
6670
@multi_gpu_test(num_gpus=4)
6771
@create_new_process_for_each_test()
6872
def test_ilama_lora_tp4(ilama_lora_files):
@@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
8488
assert output2[i] == EXPECTED_LORA_OUTPUT[i]
8589

8690

91+
@pytest.mark.skipif(current_platform.is_cuda_alike(),
92+
reason="Skipping to avoid redundant model tests")
8793
@multi_gpu_test(num_gpus=4)
8894
@create_new_process_for_each_test()
8995
def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):

0 commit comments

Comments
 (0)