Skip to content

Commit 7587ee1

Browse files
add install_vllm.sh script
1 parent a3b4691 commit 7587ee1

File tree

2 files changed

+129
-46
lines changed

2 files changed

+129
-46
lines changed

container/Dockerfile.vllm

Lines changed: 21 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
1010
ARG RELEASE_BUILD
1111
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
1212
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
13+
ARG VLLM_REF="059d4cd"
1314

1415
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
1516
# ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -167,52 +168,15 @@ RUN uv pip install /workspace/wheels/nixl/*.whl
167168

168169
# Install vllm - keep this early in Dockerfile to avoid
169170
# rebuilds from unrelated source code changes
170-
ARG VLLM_REF="059d4cd"
171+
ARG VLLM_REF
171172
ARG MAX_JOBS=16
172173
ENV MAX_JOBS=$MAX_JOBS
173174
ENV CUDA_HOME=/usr/local/cuda
174175
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
175176
--mount=type=cache,target=/root/.cache/uv \
176-
if [ "$ARCH" = "arm64" ]; then \
177-
uv pip install pip cuda-python && \
178-
mkdir /opt/vllm && \
179-
cd /opt/vllm && \
180-
git clone https://github.com/vllm-project/vllm.git && \
181-
cd vllm && \
182-
git checkout $VLLM_REF && \
183-
uv pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 && \
184-
python use_existing_torch.py && \
185-
uv pip install -r requirements/build.txt && \
186-
MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v && \
187-
cd tools/ep_kernels && \
188-
bash install_python_libraries.sh && \
189-
cd ep_kernels_workspace && \
190-
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
191-
cd DeepGEMM && \
192-
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
193-
git submodule sync --recursive && \
194-
git submodule update --init --recursive && \
195-
cat install.sh && \
196-
./install.sh; \
197-
else \
198-
uv pip install pip cuda-python && \
199-
mkdir /opt/vllm && \
200-
cd /opt/vllm && \
201-
git clone https://github.com/vllm-project/vllm.git && \
202-
cd vllm && \
203-
git checkout $VLLM_REF && \
204-
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
205-
cd tools/ep_kernels && \
206-
bash install_python_libraries.sh && \
207-
cd ep_kernels_workspace && \
208-
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
209-
cd DeepGEMM && \
210-
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
211-
git submodule sync --recursive && \
212-
git submodule update --init --recursive && \
213-
cat install.sh && \
214-
./install.sh; \
215-
fi
177+
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
178+
chmod +x /tmp/install_vllm.sh && \
179+
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH
216180

217181
# Common dependencies
218182
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
@@ -489,12 +453,23 @@ ARG ARCH_ALT
489453
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
490454
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
491455

492-
# Setup the python environment
456+
# Copy the virtual environment from base stage (includes vllm and all dependencies)
493457
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
494-
RUN uv venv $VIRTUAL_ENV --python 3.12 && \
495-
echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
458+
COPY --from=base /opt/dynamo/venv /opt/dynamo/venv
459+
RUN echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
496460

497-
# Common dependencies
461+
# Install vllm in non-editable mode for runtime
462+
ARG VLLM_REF
463+
ARG MAX_JOBS=16
464+
ENV MAX_JOBS=$MAX_JOBS
465+
ENV CUDA_HOME=/usr/local/cuda
466+
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
467+
--mount=type=cache,target=/root/.cache/uv \
468+
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
469+
chmod +x /tmp/install_vllm.sh && \
470+
/tmp/install_vllm.sh --no-editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS
471+
472+
# Common dependencies - these may already be installed in the copied venv, but run anyway for safety
498473
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
499474
uv pip install --requirement /tmp/requirements.txt
500475

@@ -512,7 +487,7 @@ RUN uv pip install /workspace/benchmarks
512487
#Copy NIXL and Dynamo wheels into wheelhouse
513488
COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
514489
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
515-
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
490+
RUN uv pip install ai-dynamo --find-links wheelhouse && \
516491
uv pip install nixl --find-links wheelhouse && \
517492
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
518493
rm -r wheelhouse
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Parse arguments
5+
EDITABLE=true
6+
VLLM_REF="059d4cd"
7+
MAX_JOBS=16
8+
ARCH=$(uname -m)
9+
10+
# Convert x86_64 to amd64 for consistency with Docker ARG
11+
if [ "$ARCH" = "x86_64" ]; then
12+
ARCH="amd64"
13+
elif [ "$ARCH" = "aarch64" ]; then
14+
ARCH="arm64"
15+
fi
16+
17+
while [[ $# -gt 0 ]]; do
18+
case $1 in
19+
--editable)
20+
EDITABLE=true
21+
shift
22+
;;
23+
--no-editable)
24+
EDITABLE=false
25+
shift
26+
;;
27+
--vllm-ref)
28+
VLLM_REF="$2"
29+
shift 2
30+
;;
31+
--max-jobs)
32+
MAX_JOBS="$2"
33+
shift 2
34+
;;
35+
--arch)
36+
ARCH="$2"
37+
shift 2
38+
;;
39+
-h|--help)
40+
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH]"
41+
echo "Options:"
42+
echo " --editable Install vllm in editable mode (default)"
43+
echo " --no-editable Install vllm in non-editable mode"
44+
echo " --vllm-ref REF Git reference to checkout (default: 059d4cd)"
45+
echo " --max-jobs NUM Maximum number of parallel jobs (default: 16)"
46+
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
47+
exit 0
48+
;;
49+
*)
50+
echo "Unknown option: $1"
51+
exit 1
52+
;;
53+
esac
54+
done
55+
56+
export MAX_JOBS=$MAX_JOBS
57+
export CUDA_HOME=/usr/local/cuda
58+
59+
echo "Installing vllm with the following configuration:"
60+
echo " EDITABLE: $EDITABLE"
61+
echo " VLLM_REF: $VLLM_REF"
62+
echo " MAX_JOBS: $MAX_JOBS"
63+
echo " ARCH: $ARCH"
64+
65+
# Install common dependencies
66+
uv pip install pip cuda-python
67+
68+
# Create vllm directory and clone
69+
mkdir -p /opt/vllm
70+
cd /opt/vllm
71+
git clone https://github.com/vllm-project/vllm.git
72+
cd vllm
73+
git checkout $VLLM_REF
74+
75+
if [ "$ARCH" = "arm64" ]; then
76+
echo "Installing vllm for ARM64 architecture"
77+
uv pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128
78+
python use_existing_torch.py
79+
uv pip install -r requirements/build.txt
80+
81+
if [ "$EDITABLE" = "true" ]; then
82+
MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v
83+
else
84+
MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation . -v
85+
fi
86+
else
87+
echo "Installing vllm for AMD64 architecture"
88+
if [ "$EDITABLE" = "true" ]; then
89+
VLLM_USE_PRECOMPILED=1 uv pip install -e .
90+
else
91+
VLLM_USE_PRECOMPILED=1 uv pip install .
92+
fi
93+
fi
94+
95+
# Install ep_kernels and DeepGEMM
96+
echo "Installing ep_kernels and DeepGEMM"
97+
cd tools/ep_kernels
98+
bash install_python_libraries.sh
99+
cd ep_kernels_workspace
100+
git clone https://github.com/deepseek-ai/DeepGEMM.git
101+
cd DeepGEMM
102+
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules
103+
git submodule sync --recursive
104+
git submodule update --init --recursive
105+
cat install.sh
106+
./install.sh
107+
108+
echo "vllm installation completed successfully"

0 commit comments

Comments
 (0)