2929 types : [ labeled ]
3030 workflow_dispatch :
3131 inputs :
32- vllm-version :
33- description : ' vllm version:'
34- required : true
35- type : choice
36- # Please also update this when bump matched version
37- # Current supported vLLM versions
38- options :
39- - main
40- - v0.10.0
41- - v0.9.1
42- - v0.7.3
4332 vllm-ascend-version :
44- description : ' vllm-ascend version :'
33+ description : ' vllm-ascend:'
4534 required : true
4635 type : choice
36+ # Current supported vLLM versions
4737 options :
38+ - latest
4839 - main
49- - v0.9.1-dev
50- - v0.7.3-dev
51- models :
52- description : ' model:'
53- required : true
54- type : choice
55- options :
56- - all
57- - Qwen/Qwen2.5-VL-7B-Instruct
58- - Qwen/Qwen3-8B-Base
59- - Qwen/Qwen3-30B-A3B
60- default : ' all'
40+ default : main
6141
6242# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
6343# declared as "shell: bash -el {0}" on steps that need to be properly activated.
@@ -76,58 +56,27 @@ jobs:
7656 # test will be triggered when tag '*-accuracy-test' & 'ready-for-test' or workflow_dispatch job
7757 if : >-
7858 ${{
79- (contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
80- contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
81- contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
82- contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
59+ contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
8360 contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
8461 github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
8562 }}
86- runs-on : >-
87- ${{
88- (matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-aarch64-a2-2') ||
89- 'linux-aarch64-a2-1'
90- }}
63+ runs-on : ${{ matrix.runner }}
9164 strategy :
9265 matrix :
93- # the accuracy test will run:
94- # 1. workflow_dispatch with models input
95- # - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
96- # - specified but not all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
97- # 2. PR labeled with "*-accuracy-test"
98- # - accuracy-test: Qwen/Qwen3-8B-Base, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
99- # - dense-accuracy-test: Qwen/Qwen3-8B-Base
100- # - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
101- # - moe-accuracy-test: Qwen/Qwen3-30B-A3B
102- model_name : ${{ fromJSON(
103- (github.event_name == 'schedule' &&
104- ' ["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]' ) ||
105- (github.event.inputs.models == 'all' &&
106- ' ["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]' ) ||
107- (github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
108- ' ["Qwen/Qwen3-30B-A3B"]' ) ||
109- (github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
110- ' ["Qwen/Qwen2.5-VL-7B-Instruct"]' ) ||
111- (github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
112- ' ["Qwen/Qwen3-8B-Base"]' ) ||
113- contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
114- ' ["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
115- contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
116- ' ["Qwen/Qwen3-8B-Base"]' ||
117- contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
118- ' ["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
119- contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
120- ' ["Qwen/Qwen3-30B-A3B"]'
121- ) }}
122-
66+ include :
67+ - model_name : Qwen3-8B-Base
68+ runner : linux-aarch64-a2-1
69+ - model_name : Qwen2.5-VL-7B-Instruct
70+ runner : linux-aarch64-a2-1
71+ - model_name : Qwen3-30B-A3B
72+ runner : linux-aarch64-a2-2
12373 fail-fast : false
74+
12475 name : ${{ matrix.model_name }} accuracy
12576 container :
12677 image : swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
12778 env :
128- DATASET_SOURCE : ModelScope
12979 VLLM_USE_MODELSCOPE : True
130- USE_MODELSCOPE_HUB : 1
13180 # 1. If version specified (work_dispatch), do specified branch accuracy test
13281 # 2. If no version (labeled PR), do accuracy test by default ref:
13382 # The branch, tag or SHA to checkout. When checking out the repository that
@@ -139,10 +88,10 @@ jobs:
13988 - name : Checkout repository
14089 uses : actions/checkout@v4
14190
142- - name : Check npu and CANN info
91+ - name : Set model name as output
92+ id : set_output
14393 run : |
144- npu-smi info
145- cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
94+ echo "model_name=${{ matrix.model_name }}" >> $GITHUB_OUTPUT
14695
14796 - name : Config mirrors
14897 run : |
@@ -161,19 +110,19 @@ jobs:
161110 uses : actions/checkout@v4
162111 with :
163112 repository : vllm-project/vllm
113+ ref : v0.10.0
164114 path : ./vllm-empty
165- # Please also update this when bump matched version
166- ref : ${{ github.event.inputs.vllm-version || 'v0.10.0' }}
167115
168116 - name : Install vllm-project/vllm from source
169117 working-directory : ./vllm-empty
170- run : VLLM_TARGET_DEVICE=empty pip install -e .
118+ run : |
119+ VLLM_TARGET_DEVICE=empty pip install -e .
171120
172121 - name : Resolve vllm-ascend version
173122 run : |
174123 VERSION_INPUT="${{ github.event.inputs.vllm-ascend-version }}"
175124
176- if [[ "$VERSION_INPUT" == "main " ]]; then
125+ if [[ "$VERSION_INPUT" == "latest " ]]; then
177126 TAGS=$(git ls-remote --tags --sort=-v:refname https://github.com/vllm-project/vllm-ascend "v*" | cut -f2 | sed 's|refs/tags/||')
178127 LATEST_TAG=$(echo "$TAGS" | head -n1)
179128 if [[ -z "$LATEST_TAG" ]]; then
@@ -199,8 +148,8 @@ jobs:
199148 PIP_EXTRA_INDEX_URL : https://mirrors.huaweicloud.com/ascend/repos/pypi
200149 run : |
201150 pip install -r requirements-dev.txt
202- pip install -v -e .
203-
151+ pip install -v -e .
152+
204153 - name : Get vLLM commit hash and URL
205154 working-directory : ./vllm-empty
206155 run : |
@@ -213,15 +162,6 @@ jobs:
213162 VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
214163 echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
215164
216- - name : Print resolved hashes
217- run : |
218- echo "vLLM : ${{ env.VLLM_COMMIT }}"
219- echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
220-
221- - name : Install lm-eval, ray, and datasets
222- run : |
223- pip install lm-eval==0.4.8
224-
225165 - name : Collect version info
226166 run : |
227167 for dir in /usr/local/Ascend/ascend-toolkit/*; do
@@ -242,37 +182,27 @@ jobs:
242182 pip show torch_npu | grep "Version:" | awk '{print "GHA_TORCH_NPU_VERSION="$2}'
243183 pip show vllm | grep "Version:" | awk '{print "GHA_VLLM_VERSION="$2}' | sed 's/+.*//'
244184 } >> "$GITHUB_ENV"
245-
246- - name : Print versions
247- run : |
248- echo "CANN: ${{ env.GHA_CANN_VERSION }}"
249- echo "Torch NPU: ${{ env.GHA_TORCH_NPU_VERSION }}"
250- echo "Torch: ${{ env.GHA_TORCH_VERSION }}"
251- echo "vLLM: ${{ env.GHA_VLLM_VERSION }}"
252- echo "vLLM Ascend: ${{ env.GHA_VLLM_ASCEND_VERSION }}"
253185
254- - name : Run Accuracy Test
186+ - name : Run accuracy test
255187 id : report
256- working-directory : ./benchmarks
257188 env :
258- PYTORCH_NPU_ALLOC_CONF : max_split_size_mb:256
189+ VLLM_WORKER_MULTIPROC_METHOD : spawn
190+ VLLM_USE_MODELSCOPE : True
191+ VLLM_VERSION : ${{ env.GHA_VLLM_VERSION }}
192+ VLLM_COMMIT : ${{ env.VLLM_COMMIT }}
193+ VLLM_ASCEND_VERSION : ${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}
194+ VLLM_ASCEND_COMMIT : ${{ env.VLLM_ASCEND_COMMIT }}
195+ CANN_VERSION : ${{ env.GHA_CANN_VERSION }}
196+ TORCH_VERSION : ${{ env.GHA_TORCH_VERSION }}
197+ TORCH_NPU_VERSION : ${{ env.GHA_TORCH_NPU_VERSION }}
259198 run : |
260199 model_base_name=$(basename ${{ matrix.model_name }})
261200 markdown_name="${model_base_name}"
262- echo "markdown_name=$markdown_name"
263201 echo "markdown_name=$markdown_name" >> $GITHUB_OUTPUT
264- mkdir -p ./accuracy
265-
266- python ./scripts/run_accuracy.py \
267- --model "${{ matrix.model_name }}" \
268- --output "./accuracy/${markdown_name}.md" \
269- --vllm_ascend_version "${{ env.GHA_VLLM_ASCEND_VERSION || github.ref }}" \
270- --cann_version "${{ env.GHA_CANN_VERSION }}" \
271- --torch_npu_version "${{ env.GHA_TORCH_NPU_VERSION }}" \
272- --torch_version "${{ env.GHA_TORCH_VERSION }}" \
273- --vllm_version "${{ env.GHA_VLLM_VERSION }}" \
274- --vllm_commit "${{ env.VLLM_COMMIT }}" \
275- --vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
202+ mkdir -p ./benchmarks/accuracy
203+ pytest -sv ./tests/e2e/singlecard/models/test_lm_eval_correctness.py \
204+ --config ./tests/e2e/singlecard/models/configs/${{ matrix.model_name }}.yaml \
205+ --report_output ./benchmarks/accuracy/${model_base_name}.md
276206
277207 - name : Generate step summary
278208 if : ${{ always() }}
@@ -284,19 +214,7 @@ jobs:
284214 SAFE_VLLM_ASCEND_VERSION="${GHA_VLLM_ASCEND_VERSION//\//-}"
285215 echo "SAFE_VLLM_ASCEND_VERSION=$SAFE_VLLM_ASCEND_VERSION" >> "$GITHUB_ENV"
286216
287- - name : Check report first line for failure
288- id : check_report
289- run : |
290- REPORT_PATH="./benchmarks/accuracy/${{ steps.report.outputs.markdown_name }}.md"
291- echo "Scanning $REPORT_PATH for ❌ …"
292- if grep -q '❌' "$REPORT_PATH"; then
293- echo "contains_fail=true" >> $GITHUB_OUTPUT
294- else
295- echo "contains_fail=false" >> $GITHUB_OUTPUT
296- fi
297-
298217 - name : Upload Report
299- if : ${{ github.event_name == 'workflow_dispatch' && steps.check_report.outputs.contains_fail == 'false' }}
300218 uses : actions/upload-artifact@v4
301219 with :
302220 name : " report-${{ env.SAFE_VLLM_ASCEND_VERSION }}-${{ steps.report.outputs.markdown_name }}"
@@ -305,20 +223,24 @@ jobs:
305223 retention-days : 90
306224 overwrite : true
307225
226+ outputs :
227+ model_name : ${{ steps.set_output.outputs.model_name }}
228+
308229 create_pr :
309230 runs-on : ubuntu-latest
310231 needs : accuracy_tests
311- if : ${{ github.event_name == 'workflow_dispatch' }}
232+ if : ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.vllm-ascend-version == 'latest' }}
312233 env :
313234 UPSTREAM_REPO : vllm-project/vllm-ascend
235+
314236 steps :
315237 - name : Checkout repository
316238 uses : actions/checkout@v4
317239 with :
318240 repository : vllm-ascend-ci/vllm-ascend
319241 token : ${{ secrets.PAT_TOKEN }}
320242 ref : main
321-
243+
322244 - name : Add upstream remote
323245 run : |
324246 git remote add upstream https://github.com/${{ env.UPSTREAM_REPO }}.git
@@ -350,7 +272,7 @@ jobs:
350272 find ./docs/source/developer_guide/evaluation/accuracy_report -maxdepth 1 -type f -name '*.md' ! -name 'index.md' -delete
351273 find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 2 -type f -name '*.md' -exec mv -f {} ./docs/source/developer_guide/evaluation/accuracy_report \;
352274 find ./docs/source/developer_guide/evaluation/accuracy_report -mindepth 1 -type d -empty -delete
353-
275+
354276 - name : Update accuracy_report/index.md
355277 run : |
356278 REPORT_DIR="./docs/source/developer_guide/evaluation/accuracy_report"
@@ -390,16 +312,10 @@ jobs:
390312 head: `vllm-ascend-ci:${{ env.BRANCH_NAME }}`,
391313 base: '${{ github.event.inputs.vllm-ascend-version }}',
392314 title: `[Doc] Update accuracy reports for ${{ github.event.inputs.vllm-ascend-version }}`,
393- body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
394- ${{
395- github.event.inputs.models == 'all'
396- && 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
397- || github.event.inputs.models
398- }}
399-
400- - [Workflow run][1]
315+ body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for: All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)
401316
402- [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
317+ - [Workflow run][1]
318+
319+ [1]: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}`
403320 });
404321 core.info(`Created PR #${pr.data.number}`);
405-
0 commit comments