Skip to content

Commit d8eb97d

Browse files
Merge branch 'vllm-project:main' into whisper-cudagraphs-support
2 parents 6092e13 + 5be7ca1 commit d8eb97d

File tree

1,782 files changed

+158233
-109735
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,782 files changed

+158233
-109735
lines changed

.buildkite/nightly-benchmarks/scripts/convert-results-json-to-markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ def parse_client_command(cmd: str) -> dict[str, Any]:
368368
# The GPUs sometimes come in format of "GPUTYPE\nGPUTYPE\n...",
369369
# we want to turn it into "8xGPUTYPE"
370370
df["GPU"] = df["GPU"].apply(
371-
lambda x: f"{len(x.split('\n'))}x{x.split('\n')[0]}"
371+
lambda x: f"{len(x.splitlines())}x{x.splitlines()[0]}"
372372
)
373373

374374
# get markdown tables

.buildkite/nightly-benchmarks/scripts/launch-server.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -181,18 +181,14 @@ launch_vllm_server() {
181181
if echo "$common_params" | jq -e 'has("fp8")' >/dev/null; then
182182
echo "Key 'fp8' exists in common params. Use neuralmagic fp8 model for convenience."
183183
model=$(echo "$common_params" | jq -r '.neuralmagic_quantized_model')
184-
server_command="python3 \
185-
-m vllm.entrypoints.openai.api_server \
184+
server_command="vllm serve $model \
186185
-tp $tp \
187-
--model $model \
188186
--port $port \
189187
$server_args"
190188
else
191189
echo "Key 'fp8' does not exist in common params."
192-
server_command="python3 \
193-
-m vllm.entrypoints.openai.api_server \
190+
server_command="vllm serve $model \
194191
-tp $tp \
195-
--model $model \
196192
--port $port \
197193
$server_args"
198194
fi

.buildkite/nightly-benchmarks/scripts/run-performance-benchmarks.sh

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,7 @@ run_serving_tests() {
365365
continue
366366
fi
367367

368-
server_command="$server_envs python3 \
369-
-m vllm.entrypoints.openai.api_server \
368+
server_command="$server_envs vllm serve \
370369
$server_args"
371370

372371
# run the server
@@ -455,11 +454,6 @@ main() {
455454
fi
456455
check_hf_token
457456

458-
# Set to v1 to run v1 benchmark
459-
if [[ "${ENGINE_VERSION:-v0}" == "v1" ]]; then
460-
export VLLM_USE_V1=1
461-
fi
462-
463457
# dependencies
464458
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
465459
(which jq) || (apt-get update && apt-get -y install jq)

.buildkite/pyproject.toml

Lines changed: 0 additions & 46 deletions
This file was deleted.

.buildkite/release-pipeline.yaml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ steps:
4848
agents:
4949
queue: cpu_queue_postmerge
5050
commands:
51-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0+PTX' --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
51+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
5252
- "mkdir artifacts"
5353
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
5454
- "bash .buildkite/scripts/upload-wheels.sh"
@@ -76,7 +76,7 @@ steps:
7676
queue: arm64_cpu_queue_postmerge
7777
commands:
7878
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
79-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
79+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
8080
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
8181

8282
# Add job to create multi-arch manifest
@@ -150,11 +150,16 @@ steps:
150150
queue: cpu_queue_postmerge
151151
commands:
152152
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
153-
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
154-
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT vllm/vllm-openai:nightly"
155-
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT vllm/vllm-openai:nightly-$BUILDKITE_COMMIT"
156-
- "docker push vllm/vllm-openai:nightly"
157-
- "docker push vllm/vllm-openai:nightly-$BUILDKITE_COMMIT"
153+
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64"
154+
- "docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64"
155+
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 vllm/vllm-openai:nightly-x86_64"
156+
- "docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 vllm/vllm-openai:nightly-aarch64"
157+
- "docker push vllm/vllm-openai:nightly-x86_64"
158+
- "docker push vllm/vllm-openai:nightly-aarch64"
159+
- "docker manifest create vllm/vllm-openai:nightly vllm/vllm-openai:nightly-x86_64 vllm/vllm-openai:nightly-aarch64 --amend"
160+
- "docker manifest create vllm/vllm-openai:nightly-$BUILDKITE_COMMIT vllm/vllm-openai:nightly-x86_64 vllm/vllm-openai:nightly-aarch64 --amend"
161+
- "docker manifest push vllm/vllm-openai:nightly"
162+
- "docker manifest push vllm/vllm-openai:nightly-$BUILDKITE_COMMIT"
158163
# Clean up old nightly builds (keep only last 14)
159164
- "bash .buildkite/scripts/cleanup-nightly-builds.sh"
160165
plugins:
@@ -163,3 +168,4 @@ steps:
163168
password-env: DOCKERHUB_TOKEN
164169
env:
165170
DOCKER_BUILDKIT: "1"
171+
DOCKERHUB_USERNAME: "vllmbot"

.buildkite/scripts/cleanup-nightly-builds.sh

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,41 @@ set -ex
88
# DockerHub API endpoint for vllm/vllm-openai repository
99
REPO_API_URL="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags"
1010

11-
# Get DockerHub token from environment
11+
# Get DockerHub credentials from environment
1212
if [ -z "$DOCKERHUB_TOKEN" ]; then
1313
echo "Error: DOCKERHUB_TOKEN environment variable is not set"
1414
exit 1
1515
fi
1616

17+
if [ -z "$DOCKERHUB_USERNAME" ]; then
18+
echo "Error: DOCKERHUB_USERNAME environment variable is not set"
19+
exit 1
20+
fi
21+
22+
# Get DockerHub bearer token
23+
echo "Getting DockerHub bearer token..."
24+
set +x
25+
BEARER_TOKEN=$(curl -s -X POST \
26+
-H "Content-Type: application/json" \
27+
-d "{\"username\": \"$DOCKERHUB_USERNAME\", \"password\": \"$DOCKERHUB_TOKEN\"}" \
28+
"https://hub.docker.com/v2/users/login" | jq -r '.token')
29+
set -x
30+
31+
if [ -z "$BEARER_TOKEN" ] || [ "$BEARER_TOKEN" = "null" ]; then
32+
echo "Error: Failed to get DockerHub bearer token"
33+
exit 1
34+
fi
35+
1736
# Function to get all tags from DockerHub
1837
get_all_tags() {
1938
local page=1
2039
local all_tags=""
2140

2241
while true; do
23-
local response=$(curl -s -H "Authorization: Bearer $DOCKERHUB_TOKEN" \
42+
set +x
43+
local response=$(curl -s -H "Authorization: Bearer $BEARER_TOKEN" \
2444
"$REPO_API_URL?page=$page&page_size=100")
45+
set -x
2546

2647
# Get both last_updated timestamp and tag name, separated by |
2748
local tags=$(echo "$response" | jq -r '.results[] | select(.name | startswith("nightly-")) | "\(.last_updated)|\(.name)"')
@@ -43,7 +64,9 @@ delete_tag() {
4364
echo "Deleting tag: $tag_name"
4465

4566
local delete_url="https://hub.docker.com/v2/repositories/vllm/vllm-openai/tags/$tag_name"
46-
local response=$(curl -s -X DELETE -H "Authorization: Bearer $DOCKERHUB_TOKEN" "$delete_url")
67+
set +x
68+
local response=$(curl -s -X DELETE -H "Authorization: Bearer $BEARER_TOKEN" "$delete_url")
69+
set -x
4770

4871
if echo "$response" | jq -e '.detail' > /dev/null 2>&1; then
4972
echo "Warning: Failed to delete tag $tag_name: $(echo "$response" | jq -r '.detail')"

.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,28 @@ function cpu_tests() {
2525

2626
# offline inference
2727
podman exec -it "$container_id" bash -c "
28-
set -e
29-
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
28+
set -xve
29+
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m" >> $HOME/test_basic.log
3030

3131
# Run basic model test
3232
podman exec -it "$container_id" bash -c "
33-
set -e
33+
set -evx
3434
pip install pytest pytest-asyncio einops peft Pillow soundfile transformers_stream_generator matplotlib
3535
pip install sentence-transformers datamodel_code_generator
36-
pytest -v -s tests/models/language/generation/test_bart.py -m cpu_model
36+
37+
# Note: disable Bart until supports V1
38+
# pytest -v -s tests/models/language/generation/test_bart.py -m cpu_model
3739
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-openai-community/gpt2]
3840
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-facebook/opt-125m]
3941
pytest -v -s tests/models/language/generation/test_common.py::test_models[False-5-32-google/gemma-1.1-2b-it]
4042
pytest -v -s tests/models/language/pooling/test_classification.py::test_models[float-jason9693/Qwen2.5-1.5B-apeach]
41-
pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model"
43+
# TODO: Below test case tests/models/language/pooling/test_embedding.py::test_models[True-ssmits/Qwen2-7B-Instruct-embed-base] fails on ppc64le. Disabling it for time being.
44+
# pytest -v -s tests/models/language/pooling/test_embedding.py -m cpu_model" >> $HOME/test_rest.log
4245
}
4346

4447
# All of CPU tests are expected to be finished less than 40 mins.
4548

4649
export container_id
4750
export -f cpu_tests
48-
timeout 40m bash -c cpu_tests
51+
timeout 120m bash -c cpu_tests
4952

0 commit comments

Comments
 (0)