Skip to content

Commit 5605150

Browse files
committed
add v0.8.3 test
Signed-off-by: wangli <wangli858794774@gmail.com>
1 parent c55b224 commit 5605150

File tree

3 files changed

+245
-78
lines changed

3 files changed

+245
-78
lines changed

.github/workflows/vllm_ascend_test_main.yaml

Lines changed: 16 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# limitations under the License.
1616
#
1717

18-
name: 'e2e test'
18+
name: 'e2e test main'
1919

2020
on:
2121
pull_request:
@@ -72,7 +72,6 @@ jobs:
7272
uses: actions/checkout@v4
7373
with:
7474
repository: vllm-project/vllm
75-
fetch-depth: 0
7675
path: ./vllm-empty
7776

7877
- name: Install vllm-project/vllm from source
@@ -100,35 +99,22 @@ jobs:
10099
101100
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
102101
103-
- name: Run vllm-project/vllm-ascend test
102+
- name: Run vllm-project/vllm-ascend test on V0 engine
104103
env:
105104
VLLM_USE_V1: 0
106105
HF_ENDPOINT: https://hf-mirror.com
107106
run: |
108107
VLLM_USE_V1=0 pytest -sv -m 'not multinpu' tests
109108
110-
- name: Run vllm-project/vllm test for V0 Engine
111-
env:
112-
VLLM_USE_V1: 0
113-
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
114-
HF_ENDPOINT: https://hf-mirror.com
115-
run: |
116-
pytest -sv
117-
118-
- name: Checkout to vllm 0.8.3
119-
working-directory: ./vllm-empty
120-
run: |
121-
git checkout v0.8.3
122-
VLLM_TARGET_DEVICE=empty pip install -e .
123-
124-
- name: Run vllm-project/vllm-ascend test with vllm v0.8.3
109+
- name: Run vllm-project/vllm-ascend test for V1 Engine
125110
env:
126-
VLLM_USE_V1: 0
111+
VLLM_USE_V1: 1
112+
VLLM_WORKER_MULTIPROC_METHOD: spawn
127113
HF_ENDPOINT: https://hf-mirror.com
128114
run: |
129-
VLLM_USE_V1=0 pytest -sv -m 'not multinpu' tests
115+
pytest -sv -m 'not multinpu' tests
130116
131-
- name: Run vllm-project/vllm test for V0 Engine with vllm v0.8.3
117+
- name: Run vllm-project/vllm test for V0 Engine
132118
env:
133119
VLLM_USE_V1: 0
134120
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
@@ -175,7 +161,6 @@ jobs:
175161
uses: actions/checkout@v4
176162
with:
177163
repository: vllm-project/vllm
178-
fetch-depth: 0
179164
path: ./vllm-empty
180165

181166
- name: Install vllm-project/vllm from source
@@ -202,22 +187,18 @@ jobs:
202187
fi
203188
204189
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
205-
- name: Run vllm-project/vllm-ascend test
190+
- name: Run vllm-project/vllm-ascend test on V0 engine
206191
env:
207192
VLLM_USE_V1: 0
208-
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
209-
run: |
210-
pytest -sv -m multinpu tests/
211-
212-
- name: Checkout to vllm 0.8.3
213-
working-directory: ./vllm-empty
193+
HF_ENDPOINT: https://hf-mirror.com
214194
run: |
215-
git checkout v0.8.3
216-
VLLM_TARGET_DEVICE=empty pip install -e .
195+
VLLM_USE_V1=0 pytest -sv -m 'multinpu' tests
217196
218-
- name: Run vllm-project/vllm-ascend test with vllm v0.8.3
197+
- name: Run vllm-project/vllm-ascend test for V1 Engine
219198
env:
220-
VLLM_USE_V1: 0
221-
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
199+
VLLM_USE_V1: 1
200+
VLLM_WORKER_MULTIPROC_METHOD: spawn
201+
HF_ENDPOINT: https://hf-mirror.com
222202
run: |
223-
pytest -sv -m multinpu tests/
203+
pytest -sv -m 'multinpu' tests
204+
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
# This file is a part of the vllm-ascend project.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
name: 'e2e test v0.8.3'
19+
20+
on:
21+
workflow_run:
22+
workflows: ["e2e test main"]
23+
types:
24+
- completed
25+
26+
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
27+
# declared as "shell: bash -el {0}" on steps that need to be properly activated.
28+
# It's used to activate ascend-toolkit environment variables.
29+
defaults:
30+
run:
31+
shell: bash -el {0}
32+
33+
concurrency:
34+
group: pr-${{ github.event.workflow_run.pull_requests[0].number }}
35+
cancel-in-progress: true
36+
37+
jobs:
38+
test-singlenpu:
39+
name: vLLM Ascend test (single-npu)
40+
runs-on: linux-arm64-npu-1 # actionlint-ignore: runner-label
41+
container:
42+
image: quay.io/ascend/cann:8.0.0-910b-ubuntu22.04-py3.10
43+
steps:
44+
- name: Check npu and CANN info
45+
run: |
46+
npu-smi info
47+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
48+
49+
- name: Config mirrors
50+
run: |
51+
sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
52+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
53+
apt-get update -y
54+
apt install git -y
55+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
56+
57+
- name: Checkout vllm-project/vllm-ascend repo
58+
uses: actions/checkout@v4
59+
60+
- name: Install system dependencies
61+
run: |
62+
apt-get -y install `cat packages.txt`
63+
apt-get -y install gcc g++ cmake libnuma-dev
64+
65+
- name: Checkout vllm-project/vllm repo
66+
uses: actions/checkout@v4
67+
with:
68+
repository: vllm-project/vllm
69+
ref: v0.8.3
70+
path: ./vllm-empty
71+
72+
- name: Install vllm-project/vllm from source
73+
working-directory: ./vllm-empty
74+
run: |
75+
VLLM_TARGET_DEVICE=empty pip install -e .
76+
77+
- name: Install vllm-project/vllm-ascend
78+
run: |
79+
pip install -r requirements-dev.txt
80+
pip install -e .
81+
82+
- name: Install pta
83+
run: |
84+
if [ ! -d /root/.cache/pta ]; then
85+
mkdir -p /root/.cache/pta
86+
fi
87+
88+
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
89+
cd /root/.cache/pta
90+
rm -rf pytorch_v2.5.1_py310*
91+
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
92+
tar -zxvf pytorch_v2.5.1_py310.tar.gz
93+
fi
94+
95+
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
96+
97+
- name: Run vllm-project/vllm-ascend test on V0 engine
98+
env:
99+
VLLM_USE_V1: 0
100+
HF_ENDPOINT: https://hf-mirror.com
101+
run: |
102+
VLLM_USE_V1=0 pytest -sv -m 'not multinpu' tests
103+
104+
- name: Run vllm-project/vllm-ascend test for V1 Engine
105+
env:
106+
VLLM_USE_V1: 1
107+
VLLM_WORKER_MULTIPROC_METHOD: spawn
108+
HF_ENDPOINT: https://hf-mirror.com
109+
run: |
110+
pytest -sv -m 'not multinpu' tests
111+
112+
- name: Run vllm-project/vllm test for V0 Engine
113+
env:
114+
VLLM_USE_V1: 0
115+
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
116+
HF_ENDPOINT: https://hf-mirror.com
117+
run: |
118+
pytest -sv
119+
120+
test-multinpu:
121+
name: vLLM Ascend test (multi-npu)
122+
runs-on: linux-arm64-npu-4
123+
container:
124+
image: ascendai/cann:8.0.0-910b-ubuntu22.04-py3.10
125+
env:
126+
HF_ENDPOINT: https://hf-mirror.com
127+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
128+
steps:
129+
- name: Check npu and CANN info
130+
run: |
131+
npu-smi info
132+
cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
133+
134+
- name: Config mirrors
135+
run: |
136+
# sed -i 's|ports.ubuntu.com|mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
137+
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
138+
139+
- name: Install system dependencies
140+
run: |
141+
apt-get update -y
142+
apt-get -y install git wget
143+
144+
- name: Config git
145+
run: |
146+
git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/
147+
148+
- name: Checkout vllm-project/vllm-ascend repo
149+
uses: actions/checkout@v4
150+
151+
- name: Install dependencies
152+
run: |
153+
pip install -r requirements-dev.txt
154+
155+
- name: Checkout vllm-project/vllm repo
156+
uses: actions/checkout@v4
157+
with:
158+
repository: vllm-project/vllm
159+
ref: v0.8.3
160+
path: ./vllm-empty
161+
162+
- name: Install vllm-project/vllm from source
163+
working-directory: ./vllm-empty
164+
run: |
165+
VLLM_TARGET_DEVICE=empty pip install -e .
166+
167+
- name: Install vllm-project/vllm-ascend
168+
run: |
169+
pip install -r requirements-dev.txt
170+
pip install -e .
171+
172+
- name: Install pta
173+
run: |
174+
if [ ! -d /root/.cache/pta ]; then
175+
mkdir -p /root/.cache/pta
176+
fi
177+
178+
if [ ! -f /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl ]; then
179+
cd /root/.cache/pta
180+
rm -rf pytorch_v2.5.1_py310*
181+
wget https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/Daily/v2.5.1/20250320.3/pytorch_v2.5.1_py310.tar.gz
182+
tar -zxvf pytorch_v2.5.1_py310.tar.gz
183+
fi
184+
185+
pip install /root/.cache/pta/torch_npu-2.5.1.dev20250320-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
186+
- name: Run vllm-project/vllm-ascend test on V0 engine
187+
env:
188+
VLLM_USE_V1: 0
189+
HF_ENDPOINT: https://hf-mirror.com
190+
run: |
191+
VLLM_USE_V1=0 pytest -sv -m 'multinpu' tests
192+
193+
- name: Run vllm-project/vllm-ascend test for V1 Engine
194+
env:
195+
VLLM_USE_V1: 1
196+
VLLM_WORKER_MULTIPROC_METHOD: spawn
197+
HF_ENDPOINT: https://hf-mirror.com
198+
run: |
199+
pytest -sv -m 'multinpu' tests
200+

tests/test_offline_inference.py

Lines changed: 29 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -35,58 +35,44 @@
3535

3636

3737
@pytest.mark.parametrize("model", MODELS)
38-
@pytest.mark.parametrize("use_v1", ["1", "0"])
3938
@pytest.mark.parametrize("dtype", ["half", "float16"])
4039
@pytest.mark.parametrize("max_tokens", [5])
41-
def test_models(model: str, use_v1: str, dtype: str, max_tokens: int,
42-
monkeypatch: pytest.MonkeyPatch) -> None:
43-
with monkeypatch.context() as m:
44-
m.setenv("VLLM_USE_V1", use_v1)
45-
if use_v1 == '1':
46-
m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
47-
# 5042 tokens for gemma2
48-
# gemma2 has alternating sliding window size of 4096
49-
# we need a prompt with more than 4096 tokens to test the sliding window
50-
prompt = "The following numbers of the sequence " + ", ".join(
51-
str(i) for i in range(1024)) + " are:"
52-
example_prompts = [prompt]
40+
def test_models(model: str, dtype: str, max_tokens: int) -> None:
41+
# 5042 tokens for gemma2
42+
# gemma2 has alternating sliding window size of 4096
43+
# we need a prompt with more than 4096 tokens to test the sliding window
44+
prompt = "The following numbers of the sequence " + ", ".join(
45+
str(i) for i in range(1024)) + " are:"
46+
example_prompts = [prompt]
5347

54-
with VllmRunner(model,
55-
max_model_len=8192,
56-
dtype=dtype,
57-
enforce_eager=False,
58-
gpu_memory_utilization=0.7) as vllm_model:
59-
vllm_model.generate_greedy(example_prompts, max_tokens)
48+
with VllmRunner(model,
49+
max_model_len=8192,
50+
dtype=dtype,
51+
enforce_eager=False,
52+
gpu_memory_utilization=0.7) as vllm_model:
53+
vllm_model.generate_greedy(example_prompts, max_tokens)
6054

6155

62-
# Now our pvc reading speed is too slow
63-
# For faster testing, temporarily uncheck the support for testing large weight models on v1
6456
@pytest.mark.multinpu
65-
@pytest.mark.parametrize("use_v1", ["0"])
6657
@pytest.mark.parametrize("model, distributed_executor_backend", [
6758
("Qwen/QwQ-32B", "mp"),
6859
])
69-
def test_models_distributed(vllm_runner, use_v1: str, model: str,
70-
distributed_executor_backend: str,
71-
monkeypatch: pytest.MonkeyPatch) -> None:
72-
with monkeypatch.context() as m:
73-
m.setenv("VLLM_USE_V1", use_v1)
74-
if use_v1 == '1':
75-
m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn")
76-
example_prompts = [
77-
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
78-
"Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020.",
79-
"Compare and contrast artificial intelligence with human intelligence in terms of processing information.",
80-
]
81-
dtype = "half"
82-
max_tokens = 5
83-
with vllm_runner(
84-
model,
85-
dtype=dtype,
86-
tensor_parallel_size=4,
87-
distributed_executor_backend=distributed_executor_backend,
88-
) as vllm_model:
89-
vllm_model.generate_greedy(example_prompts, max_tokens)
60+
def test_models_distributed(vllm_runner, model: str,
61+
distributed_executor_backend: str) -> None:
62+
example_prompts = [
63+
"vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs.",
64+
"Briefly describe the major milestones in the development of artificial intelligence from 1950 to 2020.",
65+
"Compare and contrast artificial intelligence with human intelligence in terms of processing information.",
66+
]
67+
dtype = "half"
68+
max_tokens = 5
69+
with vllm_runner(
70+
model,
71+
dtype=dtype,
72+
tensor_parallel_size=4,
73+
distributed_executor_backend=distributed_executor_backend,
74+
) as vllm_model:
75+
vllm_model.generate_greedy(example_prompts, max_tokens)
9076

9177

9278
if __name__ == "__main__":

0 commit comments

Comments
 (0)