Skip to content

Commit 20c5daf

Browse files
fix: install torch distribution matching container cuda version (#2027)
1 parent 4449f3d commit 20c5daf

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

container/Dockerfile.vllm

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
1111
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
1212
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
1313
ARG VLLM_REF="059d4cd"
14+
ARG TORCH_BACKEND="cu128"
1415

1516
# After this commit deepgemm API changed
1617
# 1.0.0 -> 2.0.0
@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
3839

3940
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
4041

41-
# Redeclare ARCH and ARCH_ALT so they're available in this stage
42+
# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
4243
ARG ARCH
4344
ARG ARCH_ALT
45+
ARG TORCH_BACKEND
4446

4547
USER root
4648
ARG PYTHON_VERSION=3.12
@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
192194
--mount=type=cache,target=/root/.cache/uv \
193195
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
194196
chmod +x /tmp/install_vllm.sh && \
195-
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
197+
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND
196198

197199
ENV LD_LIBRARY_PATH=\
198200
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\

container/deps/vllm/install_vllm.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
2626
ARCH=$(uname -m)
2727
DEEPGEMM_REF="6c9558e"
2828
FLASHINF_REF="1d72ed4"
29+
TORCH_BACKEND="cu128"
2930

3031
# Convert x86_64 to amd64 for consistency with Docker ARG
3132
if [ "$ARCH" = "x86_64" ]; then
@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
6869
FLASHINF_REF="$2"
6970
shift 2
7071
;;
72+
--torch-backend)
73+
TORCH_BACKEND="$2"
74+
shift 2
75+
;;
7176
-h|--help)
72-
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]"
77+
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND]"
7378
echo "Options:"
7479
echo " --editable Install vllm in editable mode (default)"
7580
echo " --no-editable Install vllm in non-editable mode"
@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
7984
echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
8085
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
8186
echo " --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
87+
echo " --torch-backend BACKEND Torch backend to use (default: cu128)"
8288
exit 0
8389
;;
8490
*)
@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
96102
echo " VLLM_REF: $VLLM_REF"
97103
echo " MAX_JOBS: $MAX_JOBS"
98104
echo " ARCH: $ARCH"
105+
echo " TORCH_BACKEND: $TORCH_BACKEND"
99106

100107
# Install common dependencies
101108
uv pip install pip cuda-python
@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
128135
else
129136
echo "Installing vllm for AMD64 architecture"
130137
if [ "$EDITABLE" = "true" ]; then
131-
VLLM_USE_PRECOMPILED=1 uv pip install -e .
138+
VLLM_USE_PRECOMPILED=1 uv pip install -e . --torch-backend=$TORCH_BACKEND
132139
else
133-
VLLM_USE_PRECOMPILED=1 uv pip install .
140+
VLLM_USE_PRECOMPILED=1 uv pip install . --torch-backend=$TORCH_BACKEND
134141
fi
135142
fi
136143

0 commit comments

Comments
 (0)