Skip to content

Commit 1469d79

Browse files
committed
[CI/UT][Refactor] move e2e spec decode and deepseek acc test to per pr
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent ebb2a70 commit 1469d79

File tree

6 files changed

+45
-40
lines changed

6 files changed

+45
-40
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,10 @@ jobs:
239239
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
240240
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
241241
--ignore=tests/e2e/singlecard/test_camem.py
242+
# ------------ spec decode e2e test on v1 ------------ #
243+
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
244+
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
245+
# VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
242246
243247
- name: Run e2e test on V0 engine
244248
if: ${{ github.event_name == 'schedule' }}

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,10 @@ jobs:
4242
strategy:
4343
max-parallel: 2
4444
matrix:
45-
os: [linux-arm64-npu-1, linux-arm64-npu-4]
45+
os: [linux-arm64-npu-1,
46+
# revert me if requires multi-card test
47+
# linux-arm64-npu-4
48+
]
4649
vllm_version: [main, v0.9.1]
4750
name: vLLM Ascend long term test
4851
runs-on: ${{ matrix.os }}
@@ -96,14 +99,7 @@ jobs:
9699
97100
- name: Run vllm-project/vllm-ascend long term test
98101
run: |
99-
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
100-
# spec decode test
101-
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
102-
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
103-
# VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
104-
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
105-
pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
106-
pytest -sv tests/e2e/long_term/test_accuracy.py
107-
else
108-
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
109-
fi
102+
# ------------ spec decode test ------------ #
103+
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
104+
pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
105+
pytest -sv tests/e2e/long_term/test_accuracy.py

tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py renamed to tests/e2e/multicard/test_deepseek_v2_lite_tp2_accuracy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838

3939

4040
def run_test(model_name, queue, more_args=None):
41-
model_args = f"pretrained={model_name},max_model_len=4096,trust_remote_code=True,tensor_parallel_size=4"
41+
model_args = f"pretrained={model_name},max_model_len=4096,trust_remote_code=True,tensor_parallel_size=4,enforce_eager=True"
4242
if more_args is not None:
4343
model_args = f"{model_args},{more_args}"
4444
results = lm_eval.simple_evaluate(

tests/e2e/multicard/test_offline_inference_distributed.py

Lines changed: 32 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@
2525

2626
from modelscope import snapshot_download # type: ignore
2727
from vllm import SamplingParams
28+
import pytest
2829

2930
from tests.conftest import VllmRunner
31+
from tests.model_utils import check_outputs_equal
3032

3133
os.environ["PYTORCH_NPU_ALLOC_CONF"] = "max_split_size_mb:256"
3234

@@ -46,21 +48,6 @@ def test_models_distributed_QwQ():
4648
vllm_model.generate_greedy(example_prompts, max_tokens)
4749

4850

49-
def test_models_distributed_DeepSeek():
50-
example_prompts = [
51-
"Hello, my name is",
52-
]
53-
dtype = "half"
54-
max_tokens = 5
55-
with VllmRunner(
56-
"deepseek-ai/DeepSeek-V2-Lite",
57-
dtype=dtype,
58-
tensor_parallel_size=4,
59-
distributed_executor_backend="mp",
60-
) as vllm_model:
61-
vllm_model.generate_greedy(example_prompts, max_tokens)
62-
63-
6451
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE": "1"})
6552
def test_models_distributed_topk() -> None:
6653
example_prompts = [
@@ -83,18 +70,36 @@ def test_models_distributed_topk() -> None:
8370
vllm_model.generate(example_prompts, sampling_params)
8471

8572

86-
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DBO": "1"})
87-
def test_models_distributed_DeepSeek_dbo():
88-
example_prompts = ["The president of the United States is"] * 41
89-
dtype = "half"
90-
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
91-
with VllmRunner(
92-
"deepseek-ai/DeepSeek-V2-Lite",
93-
dtype=dtype,
94-
tensor_parallel_size=4,
95-
distributed_executor_backend="mp",
96-
) as vllm_model:
97-
vllm_model.generate(example_prompts, sampling_params)
73+
def test_models_distributed_DeepSeek_dbo(monkeypatch: pytest.MonkeyPatch):
74+
with monkeypatch.context() as m:
75+
m.setenv("VLLM_ASCEND_ENABLE_DBO", "1")
76+
77+
example_prompts = ["The president of the United States is"] * 41
78+
dtype = "half"
79+
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
80+
with VllmRunner(
81+
"deepseek-ai/DeepSeek-V2-Lite",
82+
dtype=dtype,
83+
tensor_parallel_size=4,
84+
distributed_executor_backend="mp",
85+
) as vllm_model:
86+
dpo_output = vllm_model.generate(example_prompts, sampling_params)
87+
88+
m.setenv("VLLM_ASCEND_ENABLE_DBO", "0")
89+
with VllmRunner(
90+
"deepseek-ai/DeepSeek-V2-Lite",
91+
dtype=dtype,
92+
tensor_parallel_size=4,
93+
distributed_executor_backend="mp",
94+
) as vllm_model:
95+
output = vllm_model.generate(example_prompts, sampling_params)
96+
97+
check_outputs_equal(
98+
outputs_0_lst=output,
99+
outputs_1_lst=dpo_output,
100+
name_0="vllm_outputs",
101+
name_1="vllm_dbo_outputs",
102+
)
98103

99104

100105
def test_models_distributed_DeepSeek_W8A8():

0 commit comments

Comments
 (0)