Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ steps:
source_file_dependencies:
- vllm/lora
- tests/lora
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py --ignore=lora/test_transfomers_model.py
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
parallelism: 4

- label: PyTorch Fullgraph Smoke Test # 9min
Expand Down Expand Up @@ -602,8 +602,6 @@ steps:
# requires multi-GPU testing for validation.
- pytest -v -s -x lora/test_chatglm3_tp.py
- pytest -v -s -x lora/test_llama_tp.py
- pytest -v -s -x lora/test_minicpmv_tp.py
- pytest -v -s -x lora/test_transfomers_model.py


- label: Weight Loading Multiple GPU Test # 33min
Expand Down
23 changes: 0 additions & 23 deletions tests/lora/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import tempfile
from collections import OrderedDict
from typing import TypedDict
from unittest.mock import MagicMock, patch

import pytest
Expand All @@ -26,28 +25,6 @@
from vllm.platforms import current_platform


class ContextIDInfo(TypedDict):
lora_id: int
context_length: str


class ContextInfo(TypedDict):
lora: str
context_length: str


LONG_LORA_INFOS: list[ContextIDInfo] = [{
"lora_id": 1,
"context_length": "16k",
}, {
"lora_id": 2,
"context_length": "16k",
}, {
"lora_id": 3,
"context_length": "32k",
}]


@pytest.fixture()
def should_do_global_cleanup_after_test(request) -> bool:
"""Allow subdirectories to skip global cleanup by overriding this fixture.
Expand Down
2 changes: 1 addition & 1 deletion tests/lora/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# prefill stage(True) or decode stage(False)
STAGES = [True, False]

NUM_RANDOM_SEEDS = 10
NUM_RANDOM_SEEDS = 6

VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS = 128

Expand Down
17 changes: 0 additions & 17 deletions tests/lora/test_llama_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
enable_chunked_prefill=True,
)
generate_and_test(llm, sql_lora_files)


@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):

llm = vllm.LLM(
MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
tensor_parallel_size=4,
fully_sharded_loras=True,
enable_lora_bias=True,
enable_chunked_prefill=True,
)
generate_and_test(llm, sql_lora_files)
1 change: 0 additions & 1 deletion tests/lora/test_minicpmv_tp.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
@pytest.mark.xfail(
current_platform.is_rocm(),
reason="MiniCPM-V dependency xformers incompatible with ROCm")
@create_new_process_for_each_test()
def test_minicpmv_lora(minicpmv_lora_files):
llm = vllm.LLM(
MODEL_PATH,
Expand Down
8 changes: 7 additions & 1 deletion tests/lora/test_transfomers_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: Apache-2.0

import pytest

import vllm
from vllm.lora.request import LoRARequest
from vllm.platforms import current_platform

from ..utils import create_new_process_for_each_test, multi_gpu_test

Expand Down Expand Up @@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
return generated_texts


@create_new_process_for_each_test()
def test_ilama_lora(ilama_lora_files):
llm = vllm.LLM(MODEL_PATH,
max_model_len=1024,
Expand All @@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
assert output2[i] == EXPECTED_LORA_OUTPUT[i]


@pytest.mark.skipif(current_platform.is_cuda_alike(),
reason="Skipping to avoid redundant model tests")
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
def test_ilama_lora_tp4(ilama_lora_files):
Expand All @@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
assert output2[i] == EXPECTED_LORA_OUTPUT[i]


@pytest.mark.skipif(current_platform.is_cuda_alike(),
reason="Skipping to avoid redundant model tests")
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):
Expand Down