Skip to content

Commit 2e734bb

Browse files
committed
merge
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2 parents a7f0600 + 4a9ce17 commit 2e734bb

File tree

119 files changed

+2321
-1076
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+2321
-1076
lines changed

.buildkite/nightly-benchmarks/tests/serving-tests.json

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,12 @@
6363
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",
6464
"disable_log_requests": "",
6565
"tensor_parallel_size": 4,
66-
"swap_space": 16,
67-
"speculative_model": "turboderp/Qwama-0.5B-Instruct",
68-
"num_speculative_tokens": 4,
69-
"speculative_draft_tensor_parallel_size": 1
66+
"swap_space": 16,
67+
"speculative_config": {
68+
"model": "turboderp/Qwama-0.5B-Instruct",
69+
"num_speculative_tokens": 4,
70+
"draft_tensor_parallel_size": 1
71+
}
7072
},
7173
"client_parameters": {
7274
"model": "meta-llama/Meta-Llama-3.1-70B-Instruct",

.buildkite/release-pipeline.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ steps:
33
agents:
44
queue: cpu_queue_postmerge
55
commands:
6-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain ."
6+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
77
- "mkdir artifacts"
88
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
99
- "bash .buildkite/upload-wheels.sh"
@@ -14,7 +14,7 @@ steps:
1414
agents:
1515
queue: cpu_queue_postmerge
1616
commands:
17-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain ."
17+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
1818
- "mkdir artifacts"
1919
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
2020
- "bash .buildkite/upload-wheels.sh"
@@ -31,7 +31,7 @@ steps:
3131
agents:
3232
queue: cpu_queue_postmerge
3333
commands:
34-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain ."
34+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=11.8.0 --tag vllm-ci:build-image --target build --progress plain -f docker/Dockerfile ."
3535
- "mkdir artifacts"
3636
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
3737
- "bash .buildkite/upload-wheels.sh"
@@ -48,7 +48,7 @@ steps:
4848
queue: cpu_queue_postmerge
4949
commands:
5050
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
51-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ."
51+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
5252
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
5353

5454
- label: "Build and publish TPU release image"
@@ -57,7 +57,7 @@ steps:
5757
agents:
5858
queue: tpu_queue_postmerge
5959
commands:
60-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f Dockerfile.tpu ."
60+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --tag vllm/vllm-tpu:nightly --tag vllm/vllm-tpu:$BUILDKITE_COMMIT --progress plain -f docker/Dockerfile.tpu ."
6161
- "docker push vllm/vllm-tpu:nightly"
6262
- "docker push vllm/vllm-tpu:$BUILDKITE_COMMIT"
6363
plugins:
@@ -82,7 +82,7 @@ steps:
8282
queue: cpu_queue_postmerge
8383
commands:
8484
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
85-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f Dockerfile.cpu ."
85+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
8686
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
8787
env:
8888
DOCKER_BUILDKIT: "1"

.buildkite/run-cpu-test-ppc64le.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ trap remove_docker_container EXIT
1010
remove_docker_container
1111

1212
# Try building the docker image
13-
docker build -t cpu-test -f Dockerfile.ppc64le .
13+
docker build -t cpu-test -f docker/Dockerfile.ppc64le .
1414

.buildkite/run-cpu-test.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ trap remove_docker_container EXIT
1818
remove_docker_container
1919

2020
# Try building the docker image
21-
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f Dockerfile.cpu .
22-
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f Dockerfile.cpu .
21+
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --tag cpu-test-"$BUILDKITE_BUILD_NUMBER" --target vllm-test -f docker/Dockerfile.cpu .
22+
numactl -C "$CORE_RANGE" -N "$NUMA_NODE" docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" --tag cpu-test-"$BUILDKITE_BUILD_NUMBER"-avx2 --target vllm-test -f docker/Dockerfile.cpu .
2323

2424
# Run the image, setting --shm-size=4g for tensor parallel.
2525
docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --cpuset-cpus="$CORE_RANGE" \

.buildkite/run-gh200-test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ python3 use_existing_torch.py
99

1010
# Try building the docker image
1111
DOCKER_BUILDKIT=1 docker build . \
12+
--file docker/Dockerfile \
1213
--target vllm-openai \
1314
--platform "linux/arm64" \
1415
-t gh200-test \

.buildkite/run-hpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
set -ex
66

77
# Try building the docker image
8-
docker build -t hpu-test-env -f Dockerfile.hpu .
8+
docker build -t hpu-test-env -f docker/Dockerfile.hpu .
99

1010
# Setup cleanup
1111
# certain versions of HPU software stack have a bug that can

.buildkite/run-neuron-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ else
3535
date "+%s" > /tmp/neuron-docker-build-timestamp
3636
fi
3737

38-
docker build -t "${image_name}" -f Dockerfile.neuron .
38+
docker build -t "${image_name}" -f docker/Dockerfile.neuron .
3939

4040
# Setup cleanup
4141
remove_docker_container() {

.buildkite/run-tpu-v1-test.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -e
44

55
# Build the docker image.
6-
docker build -f Dockerfile.tpu -t vllm-tpu .
6+
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
77

88
# Set up cleanup.
99
remove_docker_container() { docker rm -f tpu-test || true; }
@@ -21,6 +21,8 @@ docker run --privileged --net host --shm-size=16G -it \
2121
&& python3 -m pip install lm_eval[api]==0.4.4 \
2222
&& export VLLM_USE_V1=1 \
2323
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
24+
&& echo TEST_0 \
25+
&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_perf.py \
2426
&& echo TEST_1 \
2527
&& pytest -v -s /workspace/vllm/tests/tpu/test_compilation.py \
2628
&& echo TEST_2 \

.buildkite/run-xpu-test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
88
container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
99

1010
# Try building the docker image
11-
docker build -t ${image_name} -f Dockerfile.xpu .
11+
docker build -t ${image_name} -f docker/Dockerfile.xpu .
1212

1313
# Setup cleanup
1414
remove_docker_container() {

.github/mergify.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pull_request_rules:
1919
- files~=\.buildkite/
2020
- files~=^cmake/
2121
- files=CMakeLists.txt
22-
- files~=^Dockerfile
22+
- files~=^docker/Dockerfile
2323
- files~=^requirements.*\.txt
2424
- files=setup.py
2525
actions:

0 commit comments

Comments
 (0)