Skip to content

Commit 3d8e214

Browse files
yewentao256rtourgeman
authored andcommitted
[CI Test] Add Scheduled Integration Test (vllm-project#27765)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
1 parent 3ebe739 commit 3d8e214

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env bash
2+
set -euxo pipefail
3+
4+
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
5+
THRESHOLD=${1:-0.25}
6+
NUM_Q=${2:-1319}
7+
PORT=${3:-8010}
8+
OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled}
9+
mkdir -p "${OUT_DIR}"
10+
11+
wait_for_server() {
12+
local port=$1
13+
timeout 600 bash -c '
14+
until curl -sf "http://127.0.0.1:'"$port"'/health" > /dev/null; do
15+
sleep 1
16+
done'
17+
}
18+
19+
MODEL="deepseek-ai/DeepSeek-V2-lite"
20+
BACKENDS=("deepep_high_throughput" "deepep_low_latency")
21+
22+
cleanup() {
23+
if [[ -n "${SERVER_PID:-}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then
24+
kill "${SERVER_PID}" 2>/dev/null || true
25+
for _ in {1..20}; do
26+
kill -0 "${SERVER_PID}" 2>/dev/null || break
27+
sleep 0.5
28+
done
29+
kill -9 "${SERVER_PID}" 2>/dev/null || true
30+
fi
31+
}
32+
trap cleanup EXIT
33+
34+
for BACK in "${BACKENDS[@]}"; do
35+
VLLM_DEEP_GEMM_WARMUP=skip \
36+
VLLM_ALL2ALL_BACKEND=$BACK \
37+
vllm serve "$MODEL" \
38+
--enforce-eager \
39+
--tensor-parallel-size 2 \
40+
--data-parallel-size 2 \
41+
--enable-expert-parallel \
42+
--enable-eplb \
43+
--trust-remote-code \
44+
--max-model-len 2048 \
45+
--port $PORT &
46+
SERVER_PID=$!
47+
wait_for_server $PORT
48+
49+
TAG=$(echo "$MODEL" | tr '/: \\n' '_____')
50+
OUT="${OUT_DIR}/${TAG}_${BACK}.json"
51+
python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port $PORT --num-questions ${NUM_Q} --save-results ${OUT}
52+
python3 - <<PY
53+
import json; acc=json.load(open('${OUT}'))['accuracy']
54+
print(f"${MODEL} ${BACK}: accuracy {acc:.3f}")
55+
assert acc >= ${THRESHOLD}, f"${MODEL} ${BACK} accuracy {acc}"
56+
PY
57+
58+
cleanup
59+
SERVER_PID=
60+
sleep 1
61+
PORT=$((PORT+1))
62+
done
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#!/usr/bin/env bash
2+
set -euxo pipefail
3+
4+
# args: [THRESHOLD] [NUM_QUESTIONS] [START_PORT]
5+
THRESHOLD=${1:-0.8}
6+
NUM_Q=${2:-1319}
7+
PORT=${3:-8020}
8+
OUT_DIR=${OUT_DIR:-/tmp/vllm-scheduled}
9+
mkdir -p "${OUT_DIR}"
10+
11+
wait_for_server() {
12+
local port=$1
13+
timeout 600 bash -c '
14+
until curl -sf "http://127.0.0.1:'"$port"'/health" > /dev/null; do
15+
sleep 1
16+
done'
17+
}
18+
19+
MODEL="QWen/Qwen3-30B-A3B-FP8"
20+
BACKENDS=("deepep_high_throughput" "deepep_low_latency")
21+
22+
cleanup() {
23+
if [[ -n "${SERVER_PID:-}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then
24+
kill "${SERVER_PID}" 2>/dev/null || true
25+
for _ in {1..20}; do
26+
kill -0 "${SERVER_PID}" 2>/dev/null || break
27+
sleep 0.5
28+
done
29+
kill -9 "${SERVER_PID}" 2>/dev/null || true
30+
fi
31+
}
32+
trap cleanup EXIT
33+
34+
for BACK in "${BACKENDS[@]}"; do
35+
VLLM_DEEP_GEMM_WARMUP=skip \
36+
VLLM_ALL2ALL_BACKEND=$BACK \
37+
vllm serve "$MODEL" \
38+
--enforce-eager \
39+
--tensor-parallel-size 2 \
40+
--data-parallel-size 2 \
41+
--enable-expert-parallel \
42+
--trust-remote-code \
43+
--max-model-len 2048 \
44+
--port $PORT &
45+
SERVER_PID=$!
46+
wait_for_server $PORT
47+
48+
TAG=$(echo "$MODEL" | tr '/: \\n' '_____')
49+
OUT="${OUT_DIR}/${TAG}_${BACK}.json"
50+
python3 tests/evals/gsm8k/gsm8k_eval.py --host http://127.0.0.1 --port $PORT --num-questions ${NUM_Q} --save-results ${OUT}
51+
python3 - <<PY
52+
import json; acc=json.load(open('${OUT}'))['accuracy']
53+
print(f"${MODEL} ${BACK}: accuracy {acc:.3f}")
54+
assert acc >= ${THRESHOLD}, f"${MODEL} ${BACK} accuracy {acc}"
55+
PY
56+
57+
cleanup
58+
SERVER_PID=
59+
sleep 1
60+
PORT=$((PORT+1))
61+
done

.buildkite/test-pipeline.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,3 +1234,21 @@ steps:
12341234
- .buildkite/scripts/run-prime-rl-test.sh
12351235
commands:
12361236
- bash .buildkite/scripts/run-prime-rl-test.sh
1237+
1238+
- label: DeepSeek V2-Lite Accuracy
1239+
timeout_in_minutes: 60
1240+
gpu: h100
1241+
optional: true
1242+
num_gpus: 4
1243+
working_dir: "/vllm-workspace"
1244+
commands:
1245+
- bash .buildkite/scripts/scheduled_integration_test/deepseek_v2_lite_ep_eplb.sh 0.25 200 8010
1246+
1247+
- label: Qwen3-30B-A3B-FP8-block Accuracy
1248+
timeout_in_minutes: 60
1249+
gpu: h100
1250+
optional: true
1251+
num_gpus: 4
1252+
working_dir: "/vllm-workspace"
1253+
commands:
1254+
- bash .buildkite/scripts/scheduled_integration_test/qwen30b_a3b_fp8_block_ep.sh 0.8 200 8020

0 commit comments

Comments
 (0)