@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
2626ARCH=$( uname -m)
2727DEEPGEMM_REF=" 6c9558e"
2828FLASHINF_REF=" 1d72ed4"
29+ TORCH_BACKEND=" cu128"
2930
3031# Convert x86_64 to amd64 for consistency with Docker ARG
3132if [ " $ARCH " = " x86_64" ]; then
@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
6869 FLASHINF_REF=" $2 "
6970 shift 2
7071 ;;
72+ --torch-backend)
73+ TORCH_BACKEND=" $2 "
74+ shift 2
75+ ;;
7176 -h|--help)
72- echo " Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]"
77+ echo " Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND] "
7378 echo " Options:"
7479 echo " --editable Install vllm in editable mode (default)"
7580 echo " --no-editable Install vllm in non-editable mode"
@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
7984 echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
8085 echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
8186 echo " --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
87+ echo " --torch-backend BACKEND Torch backend to use (default: cu128)"
8288 exit 0
8389 ;;
8490 * )
@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
96102echo " VLLM_REF: $VLLM_REF "
97103echo " MAX_JOBS: $MAX_JOBS "
98104echo " ARCH: $ARCH "
105+ echo " TORCH_BACKEND: $TORCH_BACKEND "
99106
100107# Install common dependencies
101108uv pip install pip cuda-python
@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
128135else
129136 echo " Installing vllm for AMD64 architecture"
130137 if [ " $EDITABLE " = " true" ]; then
131- VLLM_USE_PRECOMPILED=1 uv pip install -e .
138+ VLLM_USE_PRECOMPILED=1 uv pip install -e . --torch-backend= $TORCH_BACKEND
132139 else
133- VLLM_USE_PRECOMPILED=1 uv pip install .
140+ VLLM_USE_PRECOMPILED=1 uv pip install . --torch-backend= $TORCH_BACKEND
134141 fi
135142fi
136143
0 commit comments