Skip to content

Commit 1708df5

Browse files
committed
some fixes
Signed-off-by: MengqingCao <cmq0113@163.com>
1 parent ae220c5 commit 1708df5

File tree

9 files changed

+136
-41
lines changed

9 files changed

+136
-41
lines changed

.github/workflows/vllm_ascend_test_pd.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,21 @@ jobs:
100100
run: |
101101
pip install -r requirements-dev.txt
102102
pip install -v -e .
103+
# only run test on spec decode when the related code changed
104+
- name: Check for changes in Speculative Decode
105+
if: github.event_name != 'schedule'
106+
id: filter_pd
107+
uses: dorny/paths-filter@v3
108+
with:
109+
filters: |
110+
pd_tests_changed:
111+
- ".github/workflows/vllm_ascend_test_pd.yaml"
112+
- "tests/e2e/pd_disaggreate/**"
113+
- "tests/e2e/run_disagg_pd.sh"
114+
- "vllm_ascend/distributed/**"
115+
- "vllm_ascend/models/deepseek_v2.py"
116+
117+
- name: Run vllm-project/vllm-ascend PD Disaggregation test
118+
if: steps.filter_pd.outputs.pd_tests_changed == 'true' || github.event_name == 'schedule'
119+
run: |
120+
pytest -sv tests/e2e/pd_disaggreate/test_pd_e2e.py

examples/disaggregated_prefill/disaggregated_prefill_offline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from multiprocessing import Event, Process
1414

1515
kv_connector_extra_config = {
16-
"prompt_device_ips": ["1.2.3.1", "1.2.3.2"],
16+
"prefill_device_ips": ["1.2.3.1", "1.2.3.2"],
1717
"decode_device_ips": ["1.2.3.9", "1.2.3.10"],
1818
"llmdatadist_comm_port": 26000,
1919
}

examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,13 @@ async def handle_request():
181181

182182

183183
if __name__ == "__main__":
184-
t = start_service_discovery("0.0.0.0", 30001)
185-
app.run(host="0.0.0.0", port=10001)
184+
import argparse
185+
parser = argparse.ArgumentParser(
186+
description="args of disaggregated-prefill proxy")
187+
parser.add_argument("--http-port", type=int, default=10001)
188+
parser.add_argument("--register-port", type=int, default=10002)
189+
args = parser.parse_args()
190+
191+
t = start_service_discovery("0.0.0.0", args.register_port)
192+
app.run(host="0.0.0.0", port=args.http_port)
186193
t.join()

format.sh

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -173,21 +173,21 @@ spell_check_changed() {
173173
fi
174174
}
175175

176-
echo 'vllm-ascend codespell:'
177-
# Run Codespell
178-
## This flag runs spell check of individual files. --files *must* be the first command line
179-
## arg to use this option.
180-
if [[ "$1" == '--files' ]]; then
181-
spell_check "${@:2}"
182-
# If `--all` is passed, then any further arguments are ignored and the
183-
# entire python directory is linted.
184-
elif [[ "$1" == '--all' ]]; then
185-
spell_check_all
186-
else
187-
# Check spelling only of the files that changed in last commit.
188-
spell_check_changed
189-
fi
190-
echo 'vllm-ascend codespell: Done'
176+
# echo 'vllm-ascend codespell:'
177+
# # Run Codespell
178+
# ## This flag runs spell check of individual files. --files *must* be the first command line
179+
# ## arg to use this option.
180+
# if [[ "$1" == '--files' ]]; then
181+
# spell_check "${@:2}"
182+
# # If `--all` is passed, then any further arguments are ignored and the
183+
# # entire python directory is linted.
184+
# elif [[ "$1" == '--all' ]]; then
185+
# spell_check_all
186+
# else
187+
# # Check spelling only of the files that changed in last commit.
188+
# spell_check_changed
189+
# fi
190+
# echo 'vllm-ascend codespell: Done'
191191

192192

193193
# Lint specified files

requirements-dev.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ types-jsonschema
1010
xgrammar
1111
zmq
1212
quart
13+
types-psutil

tests/e2e/pd_disaggreate/setup_pd.sh

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ function run_prefill_instance() {
3131
local tp_size=$2
3232
local prefill_port=$3
3333
local register_port=$4
34+
local prefill_device_ips=$5
35+
local decode_device_ips=$6
3436

3537
echo "================================"
3638
echo "Testing model: $model_name"
@@ -43,6 +45,8 @@ function run_prefill_instance() {
4345
--arg kv_role "kv_producer" \
4446
--argjson kv_parallel_size 8 \
4547
--arg kv_port "11001" \
48+
--argjson prefill_device_ips "$prefill_device_ips" \
49+
--argjson decode_device_ips "$decode_device_ips" \
4650
--argjson llmdatadist_comm_port "26000" \
4751
--arg proxy_ip "0.0.0.0" \
4852
--argjson proxy_port "$register_port" \
@@ -54,8 +58,8 @@ function run_prefill_instance() {
5458
"kv_parallel_size": $kv_parallel_size,
5559
"kv_port": $kv_port,
5660
"kv_connector_extra_config": {
57-
"prompt_device_ips": ["29.7.130.29"],
58-
"decode_device_ips": ["29.7.186.66"],
61+
"prefill_device_ips": $prefill_device_ips,
62+
"decode_device_ips": $decode_device_ips,
5963
"llmdatadist_comm_port": $llmdatadist_comm_port,
6064
"proxy_ip": $proxy_ip,
6165
"proxy_port": $proxy_port,
@@ -82,13 +86,17 @@ function run_decode_instance() {
8286
local tp_size=$2
8387
local decode_port=$3
8488
local register_port=$4
89+
local prefill_device_ips=$5
90+
local decode_device_ips=$6
8591

8692
KV_CONFIG=$(jq -n \
8793
--arg kv_connector "AscendSimpleConnector" \
8894
--arg kv_buffer_device "npu" \
8995
--arg kv_role "kv_consumer" \
9096
--argjson kv_parallel_size 8 \
9197
--arg kv_port "21001" \
98+
--argjson prefill_device_ips "$prefill_device_ips" \
99+
--argjson decode_device_ips "$decode_device_ips" \
92100
--argjson llmdatadist_comm_port "26000" \
93101
--arg proxy_ip "0.0.0.0" \
94102
--argjson proxy_port "$register_port" \
@@ -100,8 +108,8 @@ function run_decode_instance() {
100108
"kv_parallel_size": $kv_parallel_size,
101109
"kv_port": $kv_port,
102110
"kv_connector_extra_config": {
103-
"prompt_device_ips": ["29.7.130.29"],
104-
"decode_device_ips": ["29.7.186.66"],
111+
"prefill_device_ips": $prefill_device_ips,
112+
"decode_device_ips": $decode_device_ips,
105113
"llmdatadist_comm_port": $llmdatadist_comm_port,
106114
"proxy_ip": $proxy_ip,
107115
"proxy_port": $proxy_port,

tests/e2e/pd_disaggreate/test_pd_e2e.py

Lines changed: 59 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,60 @@
1+
#!/bin/bash
2+
3+
#
4+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
# This file is a part of the vllm-ascend project.
18+
#
19+
120
import os
221
import signal
322
import subprocess
423
import time
524

25+
import psutil
626
import requests
727

8-
PROXY_PORT = 8192
9-
REGISTER_PORT = 8193
28+
29+
def kill_process_and_children(pid):
30+
try:
31+
parent = psutil.Process(pid)
32+
children = parent.children(recursive=True)
33+
for child in children:
34+
print(f"Killing child process {child.pid}")
35+
child.kill()
36+
print(f"Killing parent process {pid}")
37+
parent.kill()
38+
except psutil.NoSuchProcess:
39+
pass
40+
41+
42+
def kill_all_vllm_related():
43+
current_pid = os.getpid()
44+
45+
for proc in psutil.process_iter(['pid', 'cmdline']):
46+
try:
47+
if proc.pid == current_pid:
48+
continue
49+
cmd = ' '.join(proc.info['cmdline'])
50+
if "vllm" in cmd or "proxy" in cmd or "engine_worker" in cmd:
51+
kill_process_and_children(proc.pid)
52+
except Exception:
53+
continue
54+
55+
56+
PROXY_PORT = 10102
57+
DECODE_PORT = 8002
1058

1159
SCRIPT_PATH = os.path.abspath("./tests/e2e/run_disagg_pd.sh")
1260

@@ -28,18 +76,20 @@ def start_and_test_pipeline():
2876
try:
2977
print("Waiting for proxy port to be available...")
3078
wait_for_port(PROXY_PORT, 1200)
79+
wait_for_port(DECODE_PORT, 1200)
3180

3281
# request
33-
prompt = "The future of AI is"
3482
payload = {
35-
"model": "Deepseek/DeepSeek-V2-Lite-Chat",
36-
"prompt": prompt,
83+
"model": "Deepseek",
84+
"prompt": "The future of AI is",
3785
"max_tokens": 64,
3886
"temperature": 0,
3987
}
40-
response = requests.post(f"http://localhost:{PROXY_PORT}/generate",
41-
json=payload,
42-
timeout=10)
88+
response = requests.post(
89+
f"http://localhost:{PROXY_PORT}/v1/completions",
90+
headers={"Content-Type": "application/json"},
91+
json=payload,
92+
timeout=10)
4393
assert response.status_code == 200, f"HTTP failed: {response.status_code}"
4494
result = response.json()
4595
print("Response:", result)
@@ -54,6 +104,7 @@ def start_and_test_pipeline():
54104
proc.wait(timeout=10)
55105
except subprocess.TimeoutExpired:
56106
proc.kill()
107+
kill_all_vllm_related()
57108

58109

59110
def test_disaggregated_pd_pipeline():

tests/e2e/run_disagg_pd.sh

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,19 @@ set -eo errexit
2222
. $(dirname "$0")/common.sh
2323
. $(dirname "$0")/pd_disaggreate/setup_pd.sh
2424

25-
MODEL_NAME="/home/cmq/vllm-workspace/DeepSeek-V2-Lite"
25+
export VLLM_USE_MODELSCOPE="True"
26+
27+
MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite"
28+
# TODO: add tp case
2629
TP_SIZE=1
2730

31+
# TODO: support multi-card
32+
prefill_ip=$(hccn_tool -i 0 -ip -g | grep "ipaddr" | awk -F: '{print $2}' | xargs)
33+
PREFILL_DEVICE_IPS="[\"$prefill_ip\"]"
34+
35+
decode_ip=$(hccn_tool -i 1 -ip -g | grep "ipaddr" | awk -F: '{print $2}' | xargs)
36+
DECODE_DEVICE_IPS="[\"$decode_ip\"]"
37+
2838
_info "====> Start pd disaggregated test"
2939
REGISTER_PORT=10101
3040
PREOXY_PORT=10102
@@ -33,15 +43,15 @@ _info "Started pd disaggregated proxy server"
3343

3444
PREFILL_PROC_NAME="Prefill-instance"
3545
PREFILL_PORT=8001
36-
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT
37-
_info "Startting prefill instance"
46+
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
47+
_info "Starting prefill instance"
3848

3949
wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions"
4050

4151
DECODE_PROC_NAME="Decode-instance"
4252
DECODE_PORT=8002
43-
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT
44-
_info "Startting decode instance"
53+
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
54+
_info "Starting decode instance"
4555

4656
wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions"
4757

vllm_ascend/distributed/kv_transfer/simple_pipe.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,22 +61,22 @@ def __init__(
6161
raise NotImplementedError(
6262
"kv_role should be inside [kv_producer, kv_consumer]")
6363

64-
prompt_device_ips = kv_connector_extra_config.get(
65-
"prompt_device_ips", None)
64+
prefill_device_ips = kv_connector_extra_config.get(
65+
"prefill_device_ips", None)
6666
decode_device_ips = kv_connector_extra_config.get(
6767
"decode_device_ips", None)
68-
if prompt_device_ips is None or decode_device_ips is None:
68+
if prefill_device_ips is None or decode_device_ips is None:
6969
raise ValueError(
70-
"Please specify prompt_device_ips and decode_device_ips"
70+
"Please specify prefill_device_ips and decode_device_ips"
7171
"in kv_transfer_config.kv_connector_extra_config")
72-
p_device_num = len(prompt_device_ips)
72+
p_device_num = len(prefill_device_ips)
7373
d_device_num = len(decode_device_ips)
7474
# When number of devices in P and D is not equal,
7575
# we assume that device in D can be mapped to any device in P.
7676
self.p_device_rank = self.rank % p_device_num
7777
self.d_device_rank = self.rank % d_device_num
7878

79-
self.prompt_ip_list = prompt_device_ips
79+
self.prompt_ip_list = prefill_device_ips
8080
self.decode_ip_list = decode_device_ips
8181
self.llmdatadist_comm_port = kv_connector_extra_config.get(
8282
"llmdatadist_comm_port", 26000)

0 commit comments

Comments
 (0)