11#! /usr/bin/env bash
22# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33# SPDX-License-Identifier: Apache-2.0
4- #
5- # Licensed under the Apache License, Version 2.0 (the "License");
6- # you may not use this file except in compliance with the License.
7- # You may obtain a copy of the License at
8- #
9- # http://www.apache.org/licenses/LICENSE-2.0
10- #
11- # Unless required by applicable law or agreed to in writing, software
12- # distributed under the License is distributed on an "AS IS" BASIS,
13- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14- # See the License for the specific language governing permissions and
15- # limitations under the License.
16-
17- # Install vllm and wideEP kernels from a specific git reference
4+
5+ # This script is used to install vLLM and its dependencies
6+ # If installing vLLM from a release tag, we will use pip to manage the install
7+ # Otherwise, we will use git to checkout the vLLM source code and build it from source.
8+ # The dependencies are installed in the following order:
9+ # 1. vLLM
10+ # 2. LMCache
11+ # 3. DeepGEMM
12+ # 5. EP kernels
1813
1914set -euo pipefail
2015
21- # Parse arguments
22- EDITABLE=true
23- # REMOVE nvshmem cherry-pick when moving to next version of vllm
24- VLLM_REF=" 1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1
25- # When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
26- # aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
27- VLLM_PRECOMPILED_WHEEL_LOCATION=" https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF} /vllm-0.10.1.1-cp38-abi3-manylinux1_x86_64.whl"
28- VLLM_GIT_URL=" https://github.com/vllm-project/vllm.git"
16+ VLLM_REF=" v0.10.2"
17+
18+ # Basic Configurations
19+ ARCH=$( uname -m)
2920MAX_JOBS=16
3021INSTALLATION_DIR=/tmp
31- ARCH=$( uname -m)
32- DEEPGEMM_REF=" f85ec64"
33- FLASHINF_REF=" v0.2.11"
22+
23+ # VLLM and Dependency Configurations
3424TORCH_BACKEND=" cu128"
25+ TORCH_CUDA_ARCH_LIST=" 9.0;10.0" # For EP Kernels
26+ DEEPGEMM_REF=" "
27+ CUDA_VERSION=" 12.8" # For DEEPGEMM
3528
36- # Convert x86_64 to amd64 for consistency with Docker ARG
37- if [ " $ARCH " = " x86_64" ]; then
38- ARCH=" amd64"
39- elif [ " $ARCH " = " aarch64" ]; then
40- ARCH=" arm64"
41- fi
29+ # These flags are applicable when installing vLLM from source code
30+ EDITABLE=true
31+ VLLM_GIT_URL=" https://github.com/vllm-project/vllm.git"
32+ FLASHINF_REF=" v0.3.0"
4233
4334while [[ $# -gt 0 ]]; do
4435 case $1 in
@@ -82,8 +73,16 @@ while [[ $# -gt 0 ]]; do
8273 TORCH_BACKEND=" $2 "
8374 shift 2
8475 ;;
76+ --torch-cuda-arch-list)
77+ TORCH_CUDA_ARCH_LIST=" $2 "
78+ shift 2
79+ ;;
80+ --cuda-version)
81+ CUDA_VERSION=" $2 "
82+ shift 2
83+ ;;
8584 -h|--help)
86- echo " Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND]"
85+ echo " Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] [--torch-cuda-arch-list LIST] [--cuda-version VERSION] "
8786 echo " Options:"
8887 echo " --editable Install vllm in editable mode (default)"
8988 echo " --no-editable Install vllm in non-editable mode"
@@ -94,6 +93,8 @@ while [[ $# -gt 0 ]]; do
9493 echo " --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF} )"
9594 echo " --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF} )"
9695 echo " --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND} )"
96+ echo " --torch-cuda-arch-list LIST CUDA architectures to compile for (default: ${TORCH_CUDA_ARCH_LIST} )"
97+ echo " --cuda-version VERSION CUDA version to use (default: ${CUDA_VERSION} )"
9798 exit 0
9899 ;;
99100 * )
@@ -103,105 +104,110 @@ while [[ $# -gt 0 ]]; do
103104 esac
104105done
105106
107+ # Convert x86_64 to amd64 for consistency with Docker ARG
108+ if [ " $ARCH " = " x86_64" ]; then
109+ ARCH=" amd64"
110+ elif [ " $ARCH " = " aarch64" ]; then
111+ ARCH=" arm64"
112+ fi
113+
106114export MAX_JOBS=$MAX_JOBS
107115export CUDA_HOME=/usr/local/cuda
108116
109- echo " Installing vllm with the following configuration:"
110- echo " EDITABLE: $EDITABLE "
111- echo " VLLM_REF: $VLLM_REF "
112- echo " MAX_JOBS: $MAX_JOBS "
113- echo " ARCH: $ARCH "
114- echo " TORCH_BACKEND: $TORCH_BACKEND "
115-
116- # Install common dependencies
117+ echo " === Installing prerequisites ==="
117118uv pip install pip cuda-python
118119
119- if [ " $ARCH " = " amd64 " ] ; then
120- # LMCache installation currently fails on arm64 due to CUDA dependency issues:
121- # OSError: CUDA_HOME environment variable is not set. Please set it to your CUDA install root.
122- # TODO: Re-enable for arm64 after verifying lmcache compatibility and resolving the build issue.
123- uv pip install lmcache==0.3.3
124- fi
120+ echo " \n=== Configuration Summary === "
121+ echo " VLLM_REF= $VLLM_REF | EDITABLE= $EDITABLE | ARCH= $ARCH "
122+ echo " MAX_JOBS= $MAX_JOBS | TORCH_BACKEND= $TORCH_BACKEND | CUDA_VERSION= $CUDA_VERSION "
123+ echo " TORCH_CUDA_ARCH_LIST= $TORCH_CUDA_ARCH_LIST "
124+ echo " DEEPGEMM_REF= $DEEPGEMM_REF | FLASHINF_REF= $FLASHINF_REF "
125+ echo " INSTALLATION_DIR= $INSTALLATION_DIR | VLLM_GIT_URL= $VLLM_GIT_URL "
125126
126- # Create vllm directory and clone
127- mkdir -p $INSTALLATION_DIR
127+ echo " \n=== Cloning vLLM repository ==="
128128cd $INSTALLATION_DIR
129129git clone $VLLM_GIT_URL vllm
130130cd vllm
131131git checkout $VLLM_REF
132- # nvshmem fix - cherry-pick commit pinning pplx version
133- # https://github.com/ai-dynamo/dynamo/actions/runs/17907241473/job/50910654042?pr=2969#step:8:280
134- # remove when moving to next version of vllm
135- # Configure git user for cherry-pick operation
136- GIT_COMMITTER_NAME=" Container Build" GIT_COMMITTER_EMAIL=" container@buildkitsandbox.local" git cherry-pick 906e461ed6ddccd3cc7b68fa72048d2d3fcbd72c
137-
138- if [ " $ARCH " = " arm64" ]; then
139- echo " Installing vllm for ARM64 architecture"
140-
141- # Try to install specific PyTorch version first, fallback to latest nightly
142- echo " Attempting to install pinned PyTorch nightly versions..."
143- if ! uv pip install torch==2.7.1+cu128 torchaudio==2.7.1 torchvision==0.22.1 --index-url https://download.pytorch.org/whl; then
144- echo " Pinned versions failed"
145- exit 1
146- # uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
147- fi
148132
149- python use_existing_torch.py
150- uv pip install -r requirements/build.txt
133+ echo " \n=== Installing vLLM ==="
151134
152- if [ " $EDITABLE " = " true" ]; then
153- MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v
154- else
155- MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation . -v
156- fi
135+ if [[ $VLLM_REF =~ ^v ]]; then
136+ # VLLM_REF starts with 'v' - use pip install with version tag
137+ echo " Installing vLLM $VLLM_REF from PyPI..."
138+ uv pip install vllm[flashinfer]==$VLLM_REF --torch-backend=$TORCH_BACKEND
157139else
158- echo " Installing vllm for AMD64 architecture"
140+ # VLLM_REF does not start with 'v' - use git checkout path
141+ if [ " $ARCH " = " arm64" ]; then
142+ echo " Building vLLM from source for ARM64 architecture..."
143+
144+ # Try to install specific PyTorch version first, fallback to latest nightly
145+ echo " Attempting to install pinned PyTorch nightly versions..."
146+ if ! uv pip install torch==2.7.1+cu128 torchaudio==2.7.1 torchvision==0.22.1 --index-url https://download.pytorch.org/whl; then
147+ echo " Pinned versions failed"
148+ exit 1
149+ # uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
150+ fi
159151
160- echo " Attempting to install pinned OpenAI version..."
161- if ! uv pip install openai==1.99.9; then
162- echo " Pinned versions failed"
163- exit 1
164- fi
152+ python use_existing_torch.py
153+ uv pip install -r requirements/build.txt
154+
155+ if [ " $EDITABLE " = " true" ]; then
156+ MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v
157+ else
158+ MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation . -v
159+ fi
165160
166- export VLLM_PRECOMPILED_WHEEL_LOCATION=" ${VLLM_PRECOMPILED_WHEEL_LOCATION} "
161+ echo " \n=== Installing FlashInfer from source ==="
162+ cd $INSTALLATION_DIR
163+ git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
164+ cd flashinfer
165+ git checkout $FLASHINF_REF
166+ uv pip install -v --no-build-isolation .
167167
168- if [ " $EDITABLE " = " true" ]; then
169- uv pip install -e . --torch-backend=$TORCH_BACKEND
170168 else
171- uv pip install . --torch-backend=$TORCH_BACKEND
172- fi
173- fi
169+ echo " Building vLLM from source for AMD64 architecture..."
174170
175- # Install ep_kernels and DeepGEMM
176- echo " Installing ep_kernels and DeepGEMM"
177- cd tools/ep_kernels
178- TORCH_CUDA_ARCH_LIST=" 9.0;10.0" bash install_python_libraries.sh # These libraries aren't pinned.
179- cd ep_kernels_workspace
180- git clone https://github.com/deepseek-ai/DeepGEMM.git
181- cd DeepGEMM
182- git checkout $DEEPGEMM_REF # Pin Version
171+ # When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
172+ # aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
173+ export VLLM_PRECOMPILED_WHEEL_LOCATION=" https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF} /vllm-0.10.2-cp38-abi3-manylinux1_x86_64.whl"
183174
184- sed -i ' s|git@github.com:|https://github.com/|g' .gitmodules
185- git submodule sync --recursive
186- git submodule update --init --recursive
175+ if [ " $EDITABLE " = " true" ]; then
176+ uv pip install -e . --torch-backend=$TORCH_BACKEND
177+ else
178+ uv pip install . --torch-backend=$TORCH_BACKEND
179+ fi
187180
188- # command for 03d0be3
189- python setup.py install
181+ echo " \n=== Installing FlashInfer from PyPI === "
182+ uv pip install flashinfer-python== $FLASHINF_REF
190183
191- # new install command for post 03d0be3
192- # cat install.sh
193- # ./install.sh
184+ fi
185+ fi
194186
187+ echo " ✓ vLLM installation completed"
195188
196- # Install Flash Infer
197- if [ " $ARCH " = " arm64" ]; then
198- uv pip install flashinfer-python
189+ echo " \n=== Installing LMCache ==="
190+ if [ " $ARCH " = " amd64" ]; then
191+ # LMCache installation currently fails on arm64 due to CUDA dependency issues:
192+ # OSError: CUDA_HOME environment variable is not set. Please set it to your CUDA install root.
193+ # TODO: Re-enable for arm64 after verifying lmcache compatibility and resolving the build issue.
194+ uv pip install lmcache==0.3.3
195+ echo " ✓ LMCache installed"
196+ else
197+ echo " ⚠ Skipping LMCache on ARM64 (compatibility issues)"
198+ fi
199+
200+ echo " \n=== Installing DeepGEMM ==="
201+ cd tools/
202+ if [ -n " $DEEPGEMM_REF " ]; then
203+ bash install_deepgemm.sh --cuda-version " ${CUDA_VERSION} " --ref " $DEEPGEMM_REF "
199204else
200- cd $INSTALLATION_DIR
201- git clone https://github.com/flashinfer-ai/flashinfer.git --recursive
202- cd flashinfer
203- git checkout $FLASHINF_REF
204- uv pip install -v --no-build-isolation .
205+ bash install_deepgemm.sh --cuda-version " ${CUDA_VERSION} "
205206fi
207+ echo " ✓ DeepGEMM installation completed"
208+
209+ echo " \n=== Installing EP Kernels (PPLX and DeepEP) ==="
210+ cd ep_kernels/
211+ TORCH_CUDA_ARCH_LIST=" $TORCH_CUDA_ARCH_LIST " bash install_python_libraries.sh
206212
207- echo " vllm installation completed successfully"
213+ echo " \n✅ All installations completed successfully! "
0 commit comments