Skip to content

Commit 87c2f30

Browse files
authored
Merge branch 'main' into gating_topk_softmax
2 parents 7fa72ca + 493768e commit 87c2f30

File tree

87 files changed

+7959
-699
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+7959
-699
lines changed

.github/format_pr_body.sh

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
#!/bin/bash
19+
20+
set -eux
21+
22+
# ensure 2 argument is passed
23+
if [ "$#" -ne 3 ]; then
24+
echo "Usage: $0 <pr_number> <vllm_version> <vllm_commit>"
25+
exit 1
26+
fi
27+
28+
PR_NUMBER=$1
29+
VLLM_VERSION=$2
30+
VLLM_COMMIT=$3
31+
OLD=/tmp/orig_pr_body.txt
32+
NEW=/tmp/new_pr_body.txt
33+
34+
gh pr view --json body --template "{{.body}}" "${PR_NUMBER}" > "${OLD}"
35+
cp "${OLD}" "${NEW}"
36+
37+
# Remove "FIX #xxxx (*link existing issues this PR will resolve*)"
38+
sed -i '/<!--/,/-->/d' "${NEW}"
39+
sed -i '/- vLLM .*$/d' "${NEW}"
40+
echo "- vLLM version: $VLLM_VERSION" >> "${NEW}"
41+
echo "- vLLM main: $VLLM_COMMIT" >> "${NEW}"
42+
43+
# Run this only if ${NEW} is different than ${OLD}
44+
if ! cmp -s "${OLD}" "${NEW}"; then
45+
echo
46+
echo "Updating PR body:"
47+
echo
48+
cat "${NEW}"
49+
gh pr edit --body-file "${NEW}" "${PR_NUMBER}"
50+
else
51+
echo "No changes needed"
52+
fi

.github/workflows/accuracy_test.yaml

Lines changed: 23 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,9 @@ on:
5353
type: choice
5454
options:
5555
- all
56-
- Qwen/Qwen2.5-7B-Instruct
5756
- Qwen/Qwen2.5-VL-7B-Instruct
5857
- Qwen/Qwen3-8B-Base
58+
- Qwen/Qwen3-30B-A3B
5959
default: 'all'
6060

6161
# Bash shells do not use ~/.profile or ~/.bashrc so these shells need to be explicitly
@@ -77,58 +77,57 @@ jobs:
7777
${{
7878
(contains(github.event.pull_request.labels.*.name, 'accuracy-test') ||
7979
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') ||
80+
contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') ||
8081
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test')) &&
8182
contains(github.event.pull_request.labels.*.name, 'ready-for-test') ||
8283
github.event_name == 'workflow_dispatch' || github.event_name == 'schedule'
8384
}}
8485
runs-on: >-
8586
${{
86-
(matrix.model_name == 'Qwen/Qwen2.5-VL-7B-Instruct' && 'linux-arm64-npu-4') ||
87+
(matrix.model_name == 'Qwen/Qwen3-30B-A3B' && 'linux-arm64-npu-4') ||
8788
'linux-arm64-npu-2'
8889
}}
8990
strategy:
9091
matrix:
91-
vllm_use_version: [0, 1]
92+
vllm_use_version: [1]
9293
# the accuracy test will run:
9394
# 1. workflow_dispatch with models input
94-
# - all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
95-
# - specified but not all: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
95+
# - all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
96+
# - specified but not all: Qwen/Qwen3-30B-A3B, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-8B-Base
9697
# 2. PR labeled with "*-accuracy-test"
97-
# - accuracy-test: Qwen/Qwen2.5-7B-Instruct, Qwen/Qwen2.5-VL-7B-Instruct
98-
# - dense-accuracy-test: Qwen/Qwen2.5-7B-Instruct
98+
# - accuracy-test: Qwen/Qwen3-8B-Base, Qwen/Qwen2.5-VL-7B-Instruct, Qwen/Qwen3-30B-A3B
99+
# - dense-accuracy-test: Qwen/Qwen3-8B-Base
99100
# - vl-accuracy-test: Qwen/Qwen2.5-VL-7B-Instruct
101+
# - moe-accuracy-test: Qwen/Qwen3-30B-A3B
100102
model_name: ${{ fromJSON(
101103
(github.event_name == 'schedule' &&
102-
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
104+
'["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
103105
(github.event.inputs.models == 'all' &&
104-
'["Qwen/Qwen2.5-7B-Instruct","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
105-
(github.event.inputs.models == 'Qwen/Qwen2.5-7B-Instruct' &&
106-
'["Qwen/Qwen2.5-7B-Instruct"]') ||
106+
'["Qwen/Qwen3-30B-A3B","Qwen/Qwen2.5-VL-7B-Instruct","Qwen/Qwen3-8B-Base"]') ||
107+
(github.event.inputs.models == 'Qwen/Qwen3-30B-A3B' &&
108+
'["Qwen/Qwen3-30B-A3B"]') ||
107109
(github.event.inputs.models == 'Qwen/Qwen2.5-VL-7B-Instruct' &&
108110
'["Qwen/Qwen2.5-VL-7B-Instruct"]') ||
109111
(github.event.inputs.models == 'Qwen/Qwen3-8B-Base' &&
110112
'["Qwen/Qwen3-8B-Base"]') ||
111113
contains(github.event.pull_request.labels.*.name, 'accuracy-test') &&
112-
'["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct"]' ||
114+
'["Qwen/Qwen3-8B-Base","Qwen/Qwen2.5-VL-7B-Instruct", "Qwen/Qwen3-30B-A3B"]' ||
113115
contains(github.event.pull_request.labels.*.name, 'dense-accuracy-test') &&
114116
'["Qwen/Qwen3-8B-Base"]' ||
115117
contains(github.event.pull_request.labels.*.name, 'vl-accuracy-test') &&
116-
'["Qwen/Qwen2.5-VL-7B-Instruct"]'
118+
'["Qwen/Qwen2.5-VL-7B-Instruct"]' ||
119+
contains(github.event.pull_request.labels.*.name, 'moe-accuracy-test') &&
120+
'["Qwen/Qwen3-30B-A3B"]'
117121
) }}
118-
# Remove exclude after https://github.com/vllm-project/vllm-ascend/issues/1044 resolved
119-
exclude:
120-
- model_name: Qwen/Qwen2.5-VL-7B-Instruct
121-
vllm_use_version: 1
122122

123123
fail-fast: false
124124
name: ${{ matrix.model_name }} accuracy V${{ matrix.vllm_use_version }}
125125
container:
126126
image: m.daocloud.io/quay.io/ascend/cann:8.1.rc1-910b-ubuntu22.04-py3.10
127127
env:
128-
HF_ENDPOINT: https://hf-mirror.com
129-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
130128
DATASET_SOURCE: ModelScope
131129
VLLM_USE_MODELSCOPE: True
130+
USE_MODELSCOPE_HUB: 1
132131
# 1. If version specified (work_dispatch), do specified branch accuracy test
133132
# 2. If no version (labeled PR), do accuracy test by default ref:
134133
# The branch, tag or SHA to checkout. When checking out the repository that
@@ -188,23 +187,19 @@ jobs:
188187
- name: Get vLLM commit hash and URL
189188
working-directory: ./vllm-empty
190189
run: |
191-
VLLM_COMMIT=$(git rev-parse HEAD)
190+
VLLM_COMMIT=$(git rev-parse --short=7 HEAD)
192191
echo "VLLM_COMMIT=$VLLM_COMMIT" >> $GITHUB_ENV
193-
echo "VLLM_COMMIT_URL=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
194192
195193
- name: Get vLLM-Ascend commit hash and URL
196194
working-directory: ./vllm-ascend
197195
run: |
198-
VLLM_ASCEND_COMMIT=$(git rev-parse HEAD)
196+
VLLM_ASCEND_COMMIT=$(git rev-parse --short=7 HEAD)
199197
echo "VLLM_ASCEND_COMMIT=$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
200-
echo "VLLM_ASCEND_COMMIT_URL=https://github.com/vllm-project/vllm-ascend/commit/$VLLM_ASCEND_COMMIT" >> $GITHUB_ENV
201198
202-
- name: Print resolved hashes and URLs
199+
- name: Print resolved hashes
203200
run: |
204201
echo "vLLM : ${{ env.VLLM_COMMIT }}"
205-
echo "vLLM link : ${{ env.VLLM_COMMIT_URL }}"
206202
echo "vLLM-Ascend: ${{ env.VLLM_ASCEND_COMMIT }}"
207-
echo "Ascend link: ${{ env.VLLM_ASCEND_COMMIT_URL }}"
208203
209204
- name: Install lm-eval, ray, and datasets
210205
run: |
@@ -263,8 +258,6 @@ jobs:
263258
--vllm_version "${{ env.GHA_VLLM_VERSION }}" \
264259
--vllm_commit "${{ env.VLLM_COMMIT }}" \
265260
--vllm_ascend_commit "${{ env.VLLM_ASCEND_COMMIT }}" \
266-
--vllm_commit_url "${{ env.VLLM_COMMIT_URL }}" \
267-
--vllm_ascend_commit_url "${{ env.VLLM_ASCEND_COMMIT_URL }}" \
268261
--vllm_use_v1 "$VLLM_USE_V1"
269262
270263
- name: Generate step summary
@@ -373,7 +366,7 @@ jobs:
373366
git push -f origin "${{ env.BRANCH_NAME }}"
374367
375368
- name: Create PR in upstream via API
376-
uses: actions/github-script@v6
369+
uses: actions/github-script@v7
377370
with:
378371
github-token: ${{ secrets.PAT_TOKEN }}
379372
script: |
@@ -386,7 +379,7 @@ jobs:
386379
body: `The accuracy results running on NPU Altlas A2 have changed, updating reports for:
387380
${{
388381
github.event.inputs.models == 'all'
389-
&& 'All models (Qwen2.5-7B-Instruct, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
382+
&& 'All models (Qwen/Qwen3-30B-A3B, Qwen2.5-VL-7B-Instruct, Qwen3-8B-Base)'
390383
|| github.event.inputs.models
391384
}}
392385

.github/workflows/doc_codespell.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,6 @@ jobs:
2828
- name: Run codespell check
2929
run: |
3030
CODESPELL_EXCLUDES=('--skip' 'tests/prompts/**,./benchmarks/sonnet.txt,*tests/lora/data/**,build/**,./vllm_ascend.egg-info/**')
31-
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn')
31+
CODESPELL_IGNORE_WORDS=('-L' 'CANN,cann,NNAL,nnal,ASCEND,ascend,EnQue,CopyIn,assertIn,rever')
3232
3333
codespell --toml pyproject.toml "${CODESPELL_EXCLUDES[@]}" "${CODESPELL_IGNORE_WORDS[@]}"
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
# This file is a part of the vllm-ascend project.
16+
#
17+
18+
name: format / pr body
19+
20+
on:
21+
# The PR updated when PR opened and push new commits
22+
pull_request_target:
23+
types: [opened, synchronize]
24+
branches:
25+
- 'main'
26+
27+
permissions:
28+
pull-requests: write
29+
30+
jobs:
31+
update-description:
32+
name: update vLLM version
33+
runs-on: ubuntu-latest
34+
35+
steps:
36+
- name: Checkout vllm-project/vllm repo
37+
uses: actions/checkout@v4
38+
with:
39+
repository: vllm-project/vllm
40+
path: ./vllm-empty
41+
42+
- name: Get vLLM version
43+
working-directory: ./vllm-empty
44+
run: |
45+
VLLM_COMMIT=$(git rev-parse HEAD)
46+
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
47+
48+
- name: Checkout repository
49+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
50+
51+
- name: Set up Python
52+
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
53+
54+
- name: Get vLLM release version
55+
run: |
56+
VLLM_VERSION=$(python3 docs/source/conf.py | jq .vllm_version | tr -d '"')
57+
echo "VLLM_VERSION=$VLLM_VERSION" >> $GITHUB_ENV
58+
59+
- name: Update PR description
60+
env:
61+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
62+
run: |
63+
bash .github/format_pr_body.sh "${{ github.event.number }}" "${{ env.VLLM_VERSION }}" "${{ env.VLLM_COMMIT }}"

.github/workflows/nightly_benchmarks.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ jobs:
6969
--device /dev/devmm_svm
7070
--device /dev/hisi_hdc
7171
env:
72-
HF_ENDPOINT: https://hf-mirror.com
73-
HF_TOKEN: ${{ secrets.HF_TOKEN }}
72+
VLLM_USE_MODELSCOPE: True
7473
ES_OM_DOMAIN: ${{ secrets.ES_OM_DOMAIN }}
7574
ES_OM_AUTHORIZATION: ${{ secrets.ES_OM_AUTHORIZATION }}
7675
VLLM_USE_V1: ${{ matrix.vllm_use_v1 }}
@@ -115,6 +114,7 @@ jobs:
115114
env:
116115
PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi
117116
run: |
117+
pip install "transformers<=4.52.4"
118118
pip install -e .
119119
pip install -r benchmarks/requirements-bench.txt
120120
@@ -197,7 +197,7 @@ jobs:
197197
--commit_title "$commit_title" \
198198
--created_at "$commit_time_no_tz" \
199199
--res_dir ./benchmarks/results \
200-
--error $ERROR_MSG \
200+
--error "$ERROR_MSG" \
201201
--extra_feat '{"VLLM_USE_V1": "${{ matrix.vllm_use_v1 }}"}'
202202
rm -rf ./benchmarks/results
203203
cd -

.github/workflows/release_whl.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
name: build / wheel
1919

2020
on:
21+
schedule:
22+
# Runs at 23:00 UTC (7:00 AM Beijing) every day
23+
- cron: '0 23 * * *'
2124
pull_request:
2225
branches:
2326
- 'main'
@@ -55,7 +58,11 @@ jobs:
5558
strategy:
5659
matrix:
5760
os: [ubuntu-24.04, ubuntu-24.04-arm]
58-
python-version: ['3.9', '3.10', '3.11']
61+
# PR only trigger latest version
62+
python-version: ${{ fromJSON(
63+
(github.event_name == 'pull_request' && '["3.11"]') ||
64+
'["3.9", "3.10", "3.11"]'
65+
) }}
5966
runs-on: ${{ matrix.os }}
6067
steps:
6168
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

0 commit comments

Comments
 (0)