Skip to content

Commit 48e925f

Browse files
[Misc] Clean up test docstrings and names (#17521)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
1 parent 1903c0b commit 48e925f

19 files changed

+51
-115
lines changed

.buildkite/test-pipeline.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -395,10 +395,8 @@ steps:
395395
- csrc/
396396
- vllm/model_executor/layers/quantization
397397
- tests/quantization
398-
- tests/models/quantization
399398
commands:
400399
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
401-
- pytest -v -s models/quantization
402400

403401
- label: LM Eval Small Models # 53min
404402
working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
@@ -509,6 +507,14 @@ steps:
509507
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
510508
- pytest -v -s models/multimodal/generation/test_common.py -m 'split(group=1) and not core_model'
511509

510+
- label: Quantized Models Test
511+
#mirror_hardwares: [amd]
512+
source_file_dependencies:
513+
- vllm/model_executor/layers/quantization
514+
- tests/models/quantization
515+
commands:
516+
- pytest -v -s models/quantization
517+
512518
# This test is used only in PR development phase to test individual models and should never run on main
513519
- label: Custom Models Test
514520
mirror_hardwares: [amd]

tests/models/language/generation/test_models.py renamed to tests/models/language/generation/test_common.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the outputs of HF and vLLM when using greedy sampling.
3-
4-
Run `pytest tests/models/test_models.py`.
5-
"""
6-
72
import pytest
83
import torch
94

tests/models/language/generation/test_granite.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the outputs of HF and vLLM for Granite models using greedy sampling.
3-
4-
Run `pytest tests/models/test_granite.py`.
5-
"""
62
import pytest
73

84
from ...utils import check_logprobs_close

tests/models/language/generation/test_mistral.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
3-
4-
Run `pytest tests/models/test_mistral.py`.
5-
"""
62
import copy
73
import json
84

tests/models/language/generation/test_phimoe.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the outputs of HF and vLLM for moe models using greedy sampling.
3-
4-
Run `pytest tests/models/test_phimoe.py`.
5-
"""
62
import pytest
73
import torch
84

tests/models/language/pooling/test_cls_models.py renamed to tests/models/language/pooling/test_classification.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the classification outputs of HF and vLLM models.
3-
4-
Run `pytest tests/models/test_cls_models.py`.
5-
"""
62
import pytest
73
import torch
84
from transformers import AutoModelForSequenceClassification
@@ -19,7 +15,7 @@
1915
)
2016
@pytest.mark.parametrize("dtype",
2117
["half"] if current_platform.is_rocm() else ["float"])
22-
def test_classification_models(
18+
def test_models(
2319
hf_runner,
2420
vllm_runner,
2521
example_prompts,

tests/models/language/pooling/test_embedding.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the embedding outputs of HF and vLLM models.
3-
4-
Run `pytest tests/models/embedding/language/test_embedding.py`.
5-
"""
62
import pytest
73

84
from vllm.config import PoolerConfig

tests/models/language/pooling/test_jina.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
# ruff: noqa: E501
3-
"""Compare the scoring outputs of HF and vLLM models.
4-
5-
Run `pytest tests/models/embedding/language/test_jina.py`.
6-
"""
72
import math
83

94
import pytest
@@ -22,9 +17,9 @@
2217
"Organic skincare for sensitive skin with aloe vera and chamomile.",
2318
"New makeup trends focus on bold colors and innovative techniques",
2419
"Bio-Hautpflege für empfindliche Haut mit Aloe Vera und Kamille",
25-
"Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken",
26-
"Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla",
27-
"Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras",
20+
"Neue Make-up-Trends setzen auf kräftige Farben und innovative Techniken", # noqa: E501
21+
"Cuidado de la piel orgánico para piel sensible con aloe vera y manzanilla", # noqa: E501
22+
"Las nuevas tendencias de maquillaje se centran en colores vivos y técnicas innovadoras", # noqa: E501
2823
"针对敏感肌专门设计的天然有机护肤产品",
2924
"新的化妆趋势注重鲜艳的颜色和创新的技巧",
3025
"敏感肌のために特別に設計された天然有機スキンケア製品",

tests/models/language/pooling/test_scoring.py

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the scoring outputs of HF and vLLM models.
3-
4-
Run `pytest tests/models/embedding/language/test_scoring.py`.
5-
"""
62
import math
73

84
import pytest
95
import torch
106
import torch.nn.functional as F
117

12-
MODELS = [
8+
CROSS_ENCODER_MODELS = [
139
"cross-encoder/ms-marco-MiniLM-L-6-v2", # Bert
1410
"BAAI/bge-reranker-v2-m3", # Roberta
1511
]
@@ -28,21 +24,21 @@
2824
"The capital of Germany is Berlin.",
2925
]
3026

27+
DTYPE = "half"
28+
3129

32-
@pytest.fixture(scope="module", params=MODELS)
30+
@pytest.fixture(scope="module", params=CROSS_ENCODER_MODELS)
3331
def model_name(request):
3432
yield request.param
3533

3634

37-
@pytest.mark.parametrize("dtype", ["half"])
38-
def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
39-
35+
def test_cross_encoder_1_to_1(vllm_runner, hf_runner, model_name):
4036
text_pair = [TEXTS_1[0], TEXTS_2[0]]
4137

42-
with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model:
38+
with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model:
4339
hf_outputs = hf_model.predict([text_pair]).tolist()
4440

45-
with vllm_runner(model_name, task="score", dtype=dtype,
41+
with vllm_runner(model_name, task="score", dtype=DTYPE,
4642
max_model_len=None) as vllm_model:
4743
vllm_outputs = vllm_model.score(text_pair[0], text_pair[1])
4844

@@ -52,18 +48,16 @@ def test_llm_1_to_1(vllm_runner, hf_runner, model_name, dtype: str):
5248
assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01)
5349

5450

55-
@pytest.mark.parametrize("dtype", ["half"])
56-
def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
57-
51+
def test_cross_encoder_1_to_N(vllm_runner, hf_runner, model_name):
5852
text_pairs = [
5953
[TEXTS_1[0], TEXTS_2[0]],
6054
[TEXTS_1[0], TEXTS_2[1]],
6155
]
6256

63-
with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model:
57+
with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model:
6458
hf_outputs = hf_model.predict(text_pairs).tolist()
6559

66-
with vllm_runner(model_name, task="score", dtype=dtype,
60+
with vllm_runner(model_name, task="score", dtype=DTYPE,
6761
max_model_len=None) as vllm_model:
6862
vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2)
6963

@@ -74,18 +68,16 @@ def test_llm_1_to_N(vllm_runner, hf_runner, model_name, dtype: str):
7468
assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01)
7569

7670

77-
@pytest.mark.parametrize("dtype", ["half"])
78-
def test_llm_N_to_N(vllm_runner, hf_runner, model_name, dtype: str):
79-
71+
def test_cross_encoder_N_to_N(vllm_runner, hf_runner, model_name):
8072
text_pairs = [
8173
[TEXTS_1[0], TEXTS_2[0]],
8274
[TEXTS_1[1], TEXTS_2[1]],
8375
]
8476

85-
with hf_runner(model_name, dtype=dtype, is_cross_encoder=True) as hf_model:
77+
with hf_runner(model_name, dtype=DTYPE, is_cross_encoder=True) as hf_model:
8678
hf_outputs = hf_model.predict(text_pairs).tolist()
8779

88-
with vllm_runner(model_name, task="score", dtype=dtype,
80+
with vllm_runner(model_name, task="score", dtype=DTYPE,
8981
max_model_len=None) as vllm_model:
9082
vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2)
9183

@@ -101,13 +93,10 @@ def emb_model_name(request):
10193
yield request.param
10294

10395

104-
@pytest.mark.parametrize("dtype", ["half"])
105-
def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
106-
dtype: str):
107-
96+
def test_embedding_1_to_1(vllm_runner, hf_runner, emb_model_name):
10897
text_pair = [TEXTS_1[0], TEXTS_2[0]]
10998

110-
with hf_runner(emb_model_name, dtype=dtype,
99+
with hf_runner(emb_model_name, dtype=DTYPE,
111100
is_sentence_transformer=True) as hf_model:
112101
hf_embeddings = hf_model.encode(text_pair)
113102
hf_outputs = [
@@ -116,7 +105,7 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
116105

117106
with vllm_runner(emb_model_name,
118107
task="embed",
119-
dtype=dtype,
108+
dtype=DTYPE,
120109
max_model_len=None) as vllm_model:
121110
vllm_outputs = vllm_model.score(text_pair[0], text_pair[1])
122111

@@ -126,16 +115,13 @@ def test_llm_1_to_1_embedding(vllm_runner, hf_runner, emb_model_name,
126115
assert math.isclose(hf_outputs[0], vllm_outputs[0], rel_tol=0.01)
127116

128117

129-
@pytest.mark.parametrize("dtype", ["half"])
130-
def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
131-
dtype: str):
132-
118+
def test_embedding_1_to_N(vllm_runner, hf_runner, emb_model_name):
133119
text_pairs = [
134120
[TEXTS_1[0], TEXTS_2[0]],
135121
[TEXTS_1[0], TEXTS_2[1]],
136122
]
137123

138-
with hf_runner(emb_model_name, dtype=dtype,
124+
with hf_runner(emb_model_name, dtype=DTYPE,
139125
is_sentence_transformer=True) as hf_model:
140126
hf_embeddings = [
141127
hf_model.encode(text_pair) for text_pair in text_pairs
@@ -147,7 +133,7 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
147133

148134
with vllm_runner(emb_model_name,
149135
task="embed",
150-
dtype=dtype,
136+
dtype=DTYPE,
151137
max_model_len=None) as vllm_model:
152138
vllm_outputs = vllm_model.score(TEXTS_1[0], TEXTS_2)
153139

@@ -158,16 +144,13 @@ def test_llm_1_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
158144
assert math.isclose(hf_outputs[1], vllm_outputs[1], rel_tol=0.01)
159145

160146

161-
@pytest.mark.parametrize("dtype", ["half"])
162-
def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
163-
dtype: str):
164-
147+
def test_embedding_N_to_N(vllm_runner, hf_runner, emb_model_name):
165148
text_pairs = [
166149
[TEXTS_1[0], TEXTS_2[0]],
167150
[TEXTS_1[1], TEXTS_2[1]],
168151
]
169152

170-
with hf_runner(emb_model_name, dtype=dtype,
153+
with hf_runner(emb_model_name, dtype=DTYPE,
171154
is_sentence_transformer=True) as hf_model:
172155
hf_embeddings = [
173156
hf_model.encode(text_pair) for text_pair in text_pairs
@@ -179,7 +162,7 @@ def test_llm_N_to_N_embedding(vllm_runner, hf_runner, emb_model_name,
179162

180163
with vllm_runner(emb_model_name,
181164
task="embed",
182-
dtype=dtype,
165+
dtype=DTYPE,
183166
max_model_len=None) as vllm_model:
184167
vllm_outputs = vllm_model.score(TEXTS_1, TEXTS_2)
185168

tests/models/language/pooling/test_snowflake_arctic_embed.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
"""Compare the embedding outputs of HF and vLLM models.
3-
4-
Run `pytest tests/models/embedding/language/test_snowflake_arctic_embed.py`.
5-
"""
62
import pytest
73

84
from ...utils import EmbedModelInfo, check_embeddings_close

0 commit comments

Comments
 (0)