Skip to content

Commit eba4fb3

Browse files
authored
Merge branch 'vllm-project:main' into main
2 parents 197981d + a93bed4 commit eba4fb3

36 files changed

+815
-223
lines changed

.github/workflows/vllm_ascend_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
max-parallel: 2
4444
matrix:
4545
os: [linux-arm64-npu-1, linux-arm64-npu-4]
46-
vllm_version: [main, v0.8.5.post1]
46+
vllm_version: [main, v0.9.0]
4747
concurrency:
4848
group: >
4949
${{

.github/workflows/vllm_ascend_test_long_term.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
strategy:
4242
max-parallel: 2
4343
matrix:
44-
vllm_version: [main, v0.8.5.post1]
44+
vllm_version: [main, v0.9.0]
4545
name: vLLM Ascend long term test
4646
runs-on: linux-arm64-npu-1
4747
container:

.github/workflows/vllm_ascend_test_pd.yaml

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
if: ${{ contains(github.event.pull_request.labels.*.name, 'pd-test') && contains(github.event.pull_request.labels.*.name, 'ready-for-test') || github.event_name == 'schedule' }}
4141
strategy:
4242
matrix:
43-
vllm_verison: [main, v0.8.5.post1]
43+
vllm_verison: [main, v0.9.0]
4444
name: vLLM Ascend prefilling decoding disaggregation test
4545
runs-on: linux-arm64-npu-static-8
4646

@@ -55,12 +55,6 @@ jobs:
5555
options: >-
5656
--device /dev/davinci0
5757
--device /dev/davinci1
58-
--device /dev/davinci2
59-
--device /dev/davinci3
60-
--device /dev/davinci4
61-
--device /dev/davinci5
62-
--device /dev/davinci6
63-
--device /dev/davinci7
6458
--device /dev/davinci_manager
6559
--device /dev/devmm_svm
6660
--device /dev/hisi_hdc
@@ -105,3 +99,7 @@ jobs:
10599
run: |
106100
pip install -r requirements-dev.txt
107101
pip install -v -e .
102+
103+
- name: Run vllm-project/vllm-ascend PD Disaggregation test
104+
run: |
105+
pytest -sv tests/e2e/pd_disaggreate/test_pd_e2e.py

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
3737

3838
# Install vLLM
3939
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
40-
ARG VLLM_TAG=v0.8.5.post1
40+
ARG VLLM_TAG=v0.9.0
4141
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4242
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
4343
RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \

Dockerfile.openEuler

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
3434

3535
# Install vLLM
3636
ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
37-
ARG VLLM_TAG=v0.8.5.post1
37+
ARG VLLM_TAG=v0.9.0
3838

3939
RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
4040
# In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.

examples/disaggregated_prefill/disaggregated_prefill_offline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from multiprocessing import Event, Process
1414

1515
kv_connector_extra_config = {
16-
"prompt_device_ips": ["1.2.3.1", "1.2.3.2"],
16+
"prefill_device_ips": ["1.2.3.1", "1.2.3.2"],
1717
"decode_device_ips": ["1.2.3.9", "1.2.3.10"],
1818
"llmdatadist_comm_port": 26000,
1919
}

examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,13 @@ async def handle_request():
181181

182182

183183
if __name__ == "__main__":
184-
t = start_service_discovery("0.0.0.0", 30001)
185-
app.run(host="0.0.0.0", port=10001)
184+
import argparse
185+
parser = argparse.ArgumentParser(
186+
description="args of disaggregated-prefill proxy")
187+
parser.add_argument("--http-port", type=int, default=10001)
188+
parser.add_argument("--register-port", type=int, default=10002)
189+
args = parser.parse_args()
190+
191+
t = start_service_discovery("0.0.0.0", args.register_port)
192+
app.run(host="0.0.0.0", port=args.http_port)
186193
t.join()

packages.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
git
22
vim
33
wget
4+
jq
5+
curl

requirements-dev.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ types-jsonschema
1010
xgrammar
1111
zmq
1212
numba
13+
quart
14+
types-psutil

tests/compile/test_aclgraph.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
# Copyright 2023 The vLLM team.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
"""
18+
Compare the outputs of vLLM with and without aclgraph.
19+
20+
Run `pytest tests/compile/test_aclgraph.py`.
21+
"""
22+
23+
import os
24+
25+
import pytest
26+
import torch
27+
from vllm import LLM, SamplingParams
28+
29+
from tests.conftest import VllmRunner
30+
from tests.model_utils import check_outputs_equal
31+
from vllm_ascend.utils import vllm_version_is
32+
33+
MODELS = ["Qwen/Qwen2.5-0.5B-Instruct"]
34+
35+
36+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
37+
reason="aclgraph only support on v1")
38+
@pytest.mark.skipif(
39+
(vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")),
40+
reason="aclgraph not supported in v0.8.5 and v0.8.5.post1")
41+
@pytest.mark.parametrize("model", MODELS)
42+
@pytest.mark.parametrize("max_tokens", [32])
43+
def test_models(
44+
model: str,
45+
max_tokens: int,
46+
monkeypatch: pytest.MonkeyPatch,
47+
) -> None:
48+
with monkeypatch.context() as m:
49+
prompts = [
50+
"Hello, my name is", "The president of the United States is",
51+
"The capital of France is", "The future of AI is"
52+
]
53+
54+
# aclgraph only support on v1
55+
m.setenv("VLLM_USE_V1", "1")
56+
57+
sampling_params = SamplingParams(max_tokens=max_tokens,
58+
temperature=0.0)
59+
# TODO: change to use vllmrunner when the registry of custom op is solved
60+
# while running pytest
61+
vllm_model = LLM(model)
62+
vllm_aclgraph_outputs = vllm_model.generate(prompts, sampling_params)
63+
del vllm_model
64+
torch.npu.empty_cache()
65+
66+
vllm_model = LLM(model, enforce_eager=True)
67+
vllm_eager_outputs = vllm_model.generate(prompts, sampling_params)
68+
del vllm_model
69+
torch.npu.empty_cache()
70+
71+
vllm_aclgraph_outputs_list = []
72+
for output in vllm_aclgraph_outputs:
73+
vllm_aclgraph_outputs_list.append(
74+
(output.outputs[0].index, output.outputs[0].text))
75+
76+
vllm_eager_outputs_list = []
77+
for output in vllm_eager_outputs:
78+
vllm_eager_outputs_list.append(
79+
(output.outputs[0].index, output.outputs[0].text))
80+
81+
check_outputs_equal(
82+
outputs_0_lst=vllm_eager_outputs_list,
83+
outputs_1_lst=vllm_aclgraph_outputs_list,
84+
name_0="vllm_eager_outputs",
85+
name_1="vllm_aclgraph_outputs",
86+
)
87+
88+
89+
@pytest.mark.skipif(os.getenv("VLLM_USE_V1") == "0",
90+
reason="aclgraph only support on v1")
91+
@pytest.mark.skipif(
92+
(vllm_version_is("0.8.5") or vllm_version_is("0.8.5.post1")),
93+
reason="aclgraph not supported in v0.8.5 and v0.8.5.post1")
94+
def test_deepseek_raises_error(monkeypatch: pytest.MonkeyPatch) -> None:
95+
with monkeypatch.context() as m:
96+
m.setenv("VLLM_USE_MODELSCOPE", "True")
97+
m.setenv("VLLM_USE_V1", "1")
98+
with pytest.raises(NotImplementedError) as excinfo:
99+
VllmRunner("deepseek-ai/DeepSeek-V2-Lite-Chat",
100+
max_model_len=1024,
101+
enforce_eager=False)
102+
assert "ACL Graph does not support deepseek" in str(excinfo.value)

0 commit comments

Comments
 (0)