Skip to content

Commit ca377cf

Browse files
authored
Use CUDA 12.4 as default for release and nightly wheels (#12098)
1 parent a31614e commit ca377cf

File tree

4 files changed

+25
-9
lines changed

4 files changed

+25
-9
lines changed

.buildkite/release-pipeline.yaml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
11
steps:
2+
- label: "Build wheel - CUDA 12.4"
3+
agents:
4+
queue: cpu_queue_postmerge
5+
commands:
6+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag vllm-ci:build-image --target build --progress plain ."
7+
- "mkdir artifacts"
8+
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
9+
- "bash .buildkite/upload-wheels.sh"
10+
env:
11+
DOCKER_BUILDKIT: "1"
12+
213
- label: "Build wheel - CUDA 12.1"
314
agents:
415
queue: cpu_queue_postmerge
@@ -37,7 +48,7 @@ steps:
3748
queue: cpu_queue_postmerge
3849
commands:
3950
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
40-
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.1.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ."
51+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.4.0 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain ."
4152
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
4253

4354
- label: "Build and publish TPU release image"

.buildkite/upload-wheels.sh

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,11 @@ aws s3 cp "$normal_wheel" "s3://vllm-wheels/$BUILDKITE_COMMIT/"
5050
if [[ $normal_wheel == *"cu118"* ]]; then
5151
# if $normal_wheel matches cu118, do not upload the index.html
5252
echo "Skipping index files for cu118 wheels"
53+
elif [[ $normal_wheel == *"cu121"* ]]; then
54+
# if $normal_wheel matches cu121, do not upload the index.html
55+
echo "Skipping index files for cu121 wheels"
5356
else
54-
# only upload index.html for cu12 wheels (default wheels)
57+
# only upload index.html for cu124 wheels (default wheels)
5558
aws s3 cp index.html "s3://vllm-wheels/$BUILDKITE_COMMIT/vllm/index.html"
5659
aws s3 cp "s3://vllm-wheels/nightly/index.html" "s3://vllm-wheels/$BUILDKITE_COMMIT/index.html"
5760
fi
@@ -63,8 +66,11 @@ aws s3 cp "$normal_wheel" "s3://vllm-wheels/nightly/"
6366
if [[ $normal_wheel == *"cu118"* ]]; then
6467
# if $normal_wheel matches cu118, do not upload the index.html
6568
echo "Skipping index files for cu118 wheels"
69+
elif [[ $normal_wheel == *"cu121"* ]]; then
70+
# if $normal_wheel matches cu121, do not upload the index.html
71+
echo "Skipping index files for cu121 wheels"
6672
else
67-
# only upload index.html for cu12 wheels (default wheels)
73+
# only upload index.html for cu124 wheels (default wheels)
6874
aws s3 cp index.html "s3://vllm-wheels/nightly/vllm/index.html"
6975
fi
7076

docs/source/getting_started/installation/gpu/cuda.inc.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,12 @@ Therefore, it is recommended to install vLLM with a **fresh new** environment. I
2323
You can install vLLM using either `pip` or `uv pip`:
2424

2525
```console
26-
# Install vLLM with CUDA 12.1.
26+
# Install vLLM with CUDA 12.4.
2727
pip install vllm # If you are using pip.
2828
uv pip install vllm # If you are using uv.
2929
```
3030

31-
As of now, vLLM's binaries are compiled with CUDA 12.1 and public PyTorch release versions by default. We also provide vLLM binaries compiled with CUDA 11.8 and public PyTorch release versions:
31+
As of now, vLLM's binaries are compiled with CUDA 12.4 and public PyTorch release versions by default. We also provide vLLM binaries compiled with CUDA 12.1, 11.8, and public PyTorch release versions:
3232

3333
```console
3434
# Install vLLM with CUDA 11.8.

setup.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def load_module_from_path(module_name, path):
5454
# fallback to cpu
5555
VLLM_TARGET_DEVICE = "cpu"
5656

57-
MAIN_CUDA_VERSION = "12.1"
57+
MAIN_CUDA_VERSION = "12.4"
5858

5959

6060
def is_sccache_available() -> bool:
@@ -571,9 +571,8 @@ def _read_requirements(filename: str) -> List[str]:
571571
cuda_major, cuda_minor = torch.version.cuda.split(".")
572572
modified_requirements = []
573573
for req in requirements:
574-
if ("vllm-flash-attn" in req
575-
and not (cuda_major == "12" and cuda_minor == "1")):
576-
# vllm-flash-attn is built only for CUDA 12.1.
574+
if ("vllm-flash-attn" in req and cuda_major != "12"):
575+
# vllm-flash-attn is built only for CUDA 12.x.
577576
# Skip for other versions.
578577
continue
579578
modified_requirements.append(req)

0 commit comments

Comments
 (0)