Skip to content

Commit 860fbb4

Browse files
committed
[CI] Add unit test framework
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent 966557a commit 860fbb4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+396
-267
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 91 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# This file is a part of the vllm-ascend project.
1616
#
1717

18-
name: 'e2e test / basic'
18+
name: 'test'
1919

2020
on:
2121
schedule:
@@ -114,6 +114,56 @@ jobs:
114114
echo "::add-matcher::.github/workflows/matchers/mypy.json"
115115
tools/mypy.sh 1 ${{ matrix.python-version }}
116116
117+
ut:
118+
needs: [lint]
119+
name: unit test
120+
if: ${{ needs.lint.result == 'success' }}
121+
runs-on: ubuntu-latest
122+
container:
123+
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
124+
env:
125+
VLLM_LOGGING_LEVEL: ERROR
126+
VLLM_USE_MODELSCOPE: True
127+
strategy:
128+
matrix:
129+
vllm_version: [main, v0.9.1]
130+
steps:
131+
- name: Install packages
132+
run: |
133+
apt-get update -y
134+
apt-get install -y python3-pip git vim wget net-tools gcc g++ cmake libnuma-dev
135+
136+
- name: Checkout vllm-project/vllm repo
137+
uses: actions/checkout@v4
138+
with:
139+
repository: vllm-project/vllm
140+
ref: ${{ matrix.vllm_version }}
141+
path: ./vllm-empty
142+
143+
- name: Install vllm-project/vllm from source
144+
working-directory: ./vllm-empty
145+
run: |
146+
VLLM_TARGET_DEVICE=empty python3 -m pip install . --extra-index https://download.pytorch.org/whl/cpu/
147+
python3 -m pip uninstall -y triton
148+
149+
- name: Checkout vllm-project/vllm-ascend repo
150+
uses: actions/checkout@v4
151+
152+
- name: Install vllm-project/vllm-ascend
153+
run: |
154+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
155+
python3 -m pip install -r requirements-dev.txt --extra-index https://download.pytorch.org/whl/cpu/
156+
python3 -m pip install -v . --extra-index https://download.pytorch.org/whl/cpu/
157+
158+
- name: Run unit test for V1 Engine
159+
env:
160+
VLLM_USE_V1: 1
161+
VLLM_WORKER_MULTIPROC_METHOD: spawn
162+
TORCH_DEVICE_BACKEND_AUTOLOAD: 0
163+
run: |
164+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
165+
pytest -sv tests/unit
166+
117167
e2e:
118168
needs: [lint]
119169
if: ${{ needs.lint.result == 'success' }}
@@ -122,7 +172,7 @@ jobs:
122172
matrix:
123173
os: [linux-arm64-npu-1]
124174
vllm_version: [main, v0.9.1]
125-
name: vLLM Ascend test
175+
name: singlecard e2e test
126176
runs-on: ${{ matrix.os }}
127177
container:
128178
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -168,53 +218,47 @@ jobs:
168218
pip install -r requirements-dev.txt
169219
pip install -v -e .
170220
171-
- name: Run vllm-project/vllm-ascend test for V1 Engine
221+
- name: Run e2e test for V1 Engine
172222
env:
173223
VLLM_USE_V1: 1
174224
VLLM_WORKER_MULTIPROC_METHOD: spawn
175225
VLLM_USE_MODELSCOPE: True
176226
run: |
177-
pytest -sv tests/singlecard/test_offline_inference.py
227+
pytest -sv tests/e2e/singlecard/test_offline_inference.py
178228
# TODO: switch hf to modelscope
179229
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
180-
pytest -sv tests/singlecard/test_ilama_lora.py
230+
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
181231
# TODO(sss): guided decoding doesn't work, fix it later
182-
# pytest -sv tests/singlecard/test_guided_decoding.py
183-
# test_ascend_config.py should be ran separately because it will regenerate the global config many times.
184-
pytest -sv tests/singlecard/test_ascend_config.py
185-
pytest -sv tests/singlecard/test_camem.py
186-
pytest -sv tests/singlecard/ \
187-
--ignore=tests/singlecard/test_offline_inference.py \
188-
--ignore=tests/singlecard/test_ilama_lora.py \
189-
--ignore=tests/singlecard/test_guided_decoding.py \
190-
--ignore=tests/singlecard/test_ascend_config.py \
191-
--ignore=tests/singlecard/test_camem.py
232+
# pytest -sv tests/e2e/singlecard/test_guided_decoding.py
233+
pytest -sv tests/e2e/singlecard/test_camem.py
234+
pytest -sv tests/e2e/singlecard/ \
235+
--ignore=tests/e2e/singlecard/test_offline_inference.py \
236+
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
237+
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
238+
--ignore=tests/e2e/singlecard/test_camem.py
192239
193-
- name: Run vllm-project/vllm-ascend test on V0 engine
240+
- name: Run e2e test on V0 engine
194241
if: ${{ github.event_name == 'schedule' }}
195242
env:
196243
VLLM_USE_V1: 0
197244
VLLM_USE_MODELSCOPE: True
198245
run: |
199-
pytest -sv tests/singlecard/test_offline_inference.py
246+
pytest -sv tests/e2e/singlecard/test_offline_inference.py
200247
# TODO: switch hf to modelscope
201248
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
202-
pytest -sv tests/singlecard/test_ilama_lora.py
249+
pytest -sv tests/e2e/singlecard/test_ilama_lora.py
203250
# guided decoding doesn't work, fix it later
204-
# pytest -sv tests/singlecard/test_guided_decoding.py
205-
pytest -sv tests/singlecard/test_camem.py
206-
# test_ascend_config.py should be ran separately because it will regenerate the global config many times.
207-
pytest -sv tests/singlecard/test_ascend_config.py
208-
pytest -sv tests/singlecard/test_prompt_embedding.py
209-
pytest -sv tests/singlecard/ \
210-
--ignore=tests/singlecard/test_offline_inference.py \
211-
--ignore=tests/singlecard/test_ilama_lora.py \
212-
--ignore=tests/singlecard/test_guided_decoding.py \
213-
--ignore=tests/singlecard/test_camem.py \
214-
--ignore=tests/singlecard/test_ascend_config.py \
215-
--ignore=tests/singlecard/test_prompt_embedding.py \
216-
--ignore=tests/singlecard/core/test_ascend_scheduler.py \
217-
--ignore=tests/singlecard/core/test_ascend_scheduler_e2e.py
251+
# pytest -sv tests/e2e/singlecard/test_guided_decoding.py
252+
pytest -sv tests/e2e/singlecard/test_camem.py
253+
pytest -sv tests/e2e/singlecard/test_prompt_embedding.py
254+
pytest -sv tests/e2e/singlecard/ \
255+
--ignore=tests/e2e/singlecard/test_offline_inference.py \
256+
--ignore=tests/e2e/singlecard/test_ilama_lora.py \
257+
--ignore=tests/e2e/singlecard/test_guided_decoding.py \
258+
--ignore=tests/e2e/singlecard/test_camem.py \
259+
--ignore=tests/e2e/singlecard/test_prompt_embedding.py \
260+
--ignore=tests/e2e/singlecard/core/test_ascend_scheduler.py \
261+
--ignore=tests/e2e/singlecard/core/test_ascend_scheduler_e2e.py
218262
219263
e2e-4-cards:
220264
needs: [e2e]
@@ -224,7 +268,7 @@ jobs:
224268
matrix:
225269
os: [linux-arm64-npu-4]
226270
vllm_version: [main, v0.9.1]
227-
name: vLLM Ascend test
271+
name: multicard e2e test
228272
runs-on: ${{ matrix.os }}
229273
container:
230274
# TODO(yikun): Remove m.daocloud.io prefix when infra proxy ready
@@ -279,14 +323,14 @@ jobs:
279323
run: |
280324
# TODO: switch hf to modelscope
281325
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
282-
pytest -sv tests/multicard/test_ilama_lora_tp2.py
283-
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
326+
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
327+
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
284328
# To avoid oom, we need to run the test in a single process.
285-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
286-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
287-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
288-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
289-
pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
329+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
330+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
331+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
332+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
333+
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py
290334
291335
- name: Run vllm-project/vllm-ascend test on V0 engine
292336
if: ${{ github.event_name == 'schedule' }}
@@ -296,11 +340,11 @@ jobs:
296340
run: |
297341
# TODO: switch hf to modelscope
298342
VLLM_USE_MODELSCOPE=False HF_ENDPOINT=https://hf-mirror.com \
299-
pytest -sv tests/multicard/test_ilama_lora_tp2.py
300-
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/multicard/test_offline_inference_distributed.py will raise error.
343+
pytest -sv tests/e2e/multicard/test_ilama_lora_tp2.py
344+
# Fixme: run VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py will raise error.
301345
# To avoid oom, we need to run the test in a single process.
302-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
303-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
304-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
305-
pytest -sv tests/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
306-
pytest -sv tests/multicard/ --ignore=tests/multicard/test_ilama_lora_tp2.py --ignore=tests/multicard/test_offline_inference_distributed.py
346+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_QwQ
347+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek
348+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_topk
349+
pytest -sv tests/e2e/multicard/test_offline_inference_distributed.py::test_models_distributed_DeepSeek_W8A8
350+
pytest -sv tests/e2e/multicard/ --ignore=tests/e2e/multicard/test_ilama_lora_tp2.py --ignore=tests/e2e/multicard/test_offline_inference_distributed.py

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,12 @@ jobs:
9696
run: |
9797
if [[ "${{ matrix.os }}" == "linux-arm64-npu-1" ]]; then
9898
# spec decode test
99-
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
99+
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
100100
# TODO: revert me when test_v1_spec_decode.py::test_ngram_correctness is fixed
101-
# VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_v1_spec_decode.py
102-
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
103-
pytest -sv tests/long_term/spec_decode --ignore=tests/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
104-
pytest -sv tests/long_term/test_accuracy.py
101+
# VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py
102+
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py # it needs a clean process
103+
pytest -sv tests/e2e/long_term/spec_decode --ignore=tests/e2e/long_term/spec_decode/e2e/test_mtp_correctness.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_spec_decode.py --ignore=tests/e2e/long_term/spec_decode/e2e/test_v1_mtp_correctness.py
104+
pytest -sv tests/e2e/long_term/test_accuracy.py
105105
else
106-
VLLM_USE_MODELSCOPE=True pytest -sv tests/long_term/test_deepseek_v2_lite_tp2_accuracy.py
106+
VLLM_USE_MODELSCOPE=True pytest -sv tests/e2e/long_term/test_deepseek_v2_lite_tp2_accuracy.py
107107
fi

format.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ echo 'vllm-ascend isort: Done'
273273
# Clang-format section
274274
# Exclude some files for formatting because they are vendored
275275
CLANG_FORMAT_EXCLUDES=(
276-
'csrc/kernels/pos_encoding_kernels.cpp' 'csrc/kernels/advance_step.cpp' 'csrc/torch_binding.cpp' 'csrc/ops.h'
276+
'csrc/kernels/pos_encoding_kernels.cpp' 'csrc/kernels/advance_step.cpp' 'csrc/kernels/get_masked_input_and_mask_kernel.cpp' 'csrc/torch_binding.cpp' 'csrc/ops.h'
277277
)
278278

279279
# Format specified files with clang-format

tests/long_term/spec_decode/e2e/test_medusa_correctness.py renamed to tests/e2e/long_term/spec_decode/e2e/test_medusa_correctness.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141

4242
import pytest
4343

44-
from tests.long_term.spec_decode.e2e.conftest import \
44+
from tests.e2e.long_term.spec_decode.e2e.conftest import \
4545
run_equality_correctness_test
46-
from tests.long_term.spec_decode.utils import maybe_enable_chunked_prefill
46+
from tests.e2e.long_term.spec_decode.utils import maybe_enable_chunked_prefill
4747

4848
# main model
4949
# lmsys/vicuna-7b-v1.3 was to be used but it's causing

tests/long_term/spec_decode/e2e/test_mlp_correctness.py renamed to tests/e2e/long_term/spec_decode/e2e/test_mlp_correctness.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
from vllm.model_executor.layers.vocab_parallel_embedding import \
4242
pad_vocab_size # noqa: F401
4343

44-
from tests.long_term.spec_decode.e2e.conftest import \
44+
from tests.e2e.long_term.spec_decode.e2e.conftest import \
4545
run_equality_correctness_test
46-
from tests.long_term.spec_decode.utils import maybe_enable_chunked_prefill
46+
from tests.e2e.long_term.spec_decode.utils import maybe_enable_chunked_prefill
4747

4848
# main model
4949
MAIN_MODEL = "JackFram/llama-160m"

0 commit comments

Comments
 (0)