@@ -10,16 +10,15 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
1010ARG RELEASE_BUILD
1111ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
1212ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
13- ARG VLLM_REF="059d4cd"
14- ARG TORCH_BACKEND="cu128"
15-
16- # After this commit deepgemm API changed
17- # 1.0.0 -> 2.0.0
18- ARG DEEPGEMM_REF="03d0be3"
19- ARG FLASHINF_REF="1d72ed4"
2013
2114# Make sure to update the dependency version in pyproject.toml when updating this
22- ARG VLLM_VERSION="0.9.2"
15+ ARG VLLM_REF="v0.10.0"
16+ ARG TORCH_BACKEND="cu128"
17+
18+ # Match 0.10.0 vLLM release
19+ # https://github.com/vllm-project/vllm/releases/tag/v0.10.0
20+ ARG DEEPGEMM_REF="1876566"
21+ ARG FLASHINF_REF="v0.2.8rc1"
2322
2423# Define general architecture ARGs for supporting both x86 and aarch64 builds.
2524# ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -42,11 +41,10 @@ ARG ARCH_ALT=x86_64
4241
4342FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
4443
45- # Redeclare ARCH, ARCH_ALT, TORCH_BACKEND, VLLM_VERSION so they're available in this stage
44+ # Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
4645ARG ARCH
4746ARG ARCH_ALT
4847ARG TORCH_BACKEND
49- ARG VLLM_VERSION
5048
5149USER root
5250ARG PYTHON_VERSION=3.12
@@ -195,15 +193,11 @@ ENV CUDA_HOME=/usr/local/cuda
195193
196194RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
197195 --mount=type=cache,target=/root/.cache/uv \
198- if [ "$ARCH" = "arm64" ]; then \
199196 # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
200197 # Should be able to select how you want your build to go
201198 cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
202199 chmod +x /tmp/install_vllm.sh && \
203- /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND; \
204- else \
205- uv pip install "vllm==${VLLM_VERSION}"; \
206- fi
200+ /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND;
207201
208202ENV LD_LIBRARY_PATH=\
209203/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
@@ -464,9 +458,7 @@ COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
464458COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
465459
466460# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
467- RUN if [ "$ARCH" = "arm64" ]; then \
468- COPY --from=base /opt/vllm /opt/vllm; \
469- fi
461+ COPY --from=base /opt/vllm /opt/vllm
470462
471463ENV LD_LIBRARY_PATH=\
472464/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
0 commit comments