@@ -10,6 +10,12 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
1010ARG RELEASE_BUILD
1111ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
1212ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
13+ ARG VLLM_REF="059d4cd"
14+
15+ # After this commit deepgemm API changed
16+ # 1.0.0 -> 2.0.0
17+ ARG DEEPGEMM_REF="03d0be3"
18+ ARG FLASHINF_REF="1d72ed4"
1319
1420# Define general architecture ARGs for supporting both x86 and aarch64 builds.
1521# ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -40,7 +46,7 @@ USER root
4046ARG PYTHON_VERSION=3.12
4147
4248RUN apt-get update -y && \
43- apt-get install -y \
49+ apt-get install -y --no-install-recommends \
4450 # NIXL build dependencies
4551 cmake \
4652 meson \
@@ -50,31 +56,36 @@ RUN apt-get update -y && \
5056 clang \
5157 libclang-dev \
5258 git \
59+ build-essential \
60+ protobuf-compiler \
61+ libssl-dev \
62+ pkg-config \
5363 # Install utilities
5464 nvtop \
5565 tmux \
5666 vim \
5767 autoconf \
68+ automake \
5869 libtool \
59- net-tools
60-
61- # These headers are missing with the hpcx installer, required
62- # by UCX to find RDMA devices
63- RUN apt-get update -y && \
64- apt-get install -y --no-install-recommends \
65- --reinstall libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
66- libnuma-dev librdmacm-dev ibverbs-providers
70+ net-tools \
71+ # These headers are missing with the hpcx installer, required
72+ # by UCX to find RDMA devices
73+ libibverbs-dev rdma-core ibverbs-utils libibumad-dev \
74+ libnuma-dev librdmacm-dev ibverbs-providers \
75+ # For Prometheus
76+ curl tar ca-certificates && \
77+ rm -rf /var/lib/apt/lists/*
6778
6879ARG NIXL_UCX_REF=v1.19.x
6980ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
7081
7182WORKDIR /workspace
7283
7384### UCX EFA Setup ###
74- RUN rm -rf /opt/hpcx/ucx
75- RUN rm -rf /usr/local/ucx
76- RUN echo "Building UCX with reference $NIXL_UCX_REF"
77- RUN cd /usr/local/src && \
85+ RUN rm -rf /opt/hpcx/ucx && \
86+ rm -rf /usr/local/ucx && \
87+ echo "Building UCX with reference $NIXL_UCX_REF" && \
88+ cd /usr/local/src && \
7889 git clone https://github.com/openucx/ucx.git && \
7990 cd ucx && \
8091 git checkout $NIXL_UCX_REF && \
@@ -96,7 +107,10 @@ RUN cd /usr/local/src && \
96107 make -j install-strip && \
97108 ldconfig
98109
99- ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
110+ ENV LD_LIBRARY_PATH=\
111+ /usr/lib:/usr/local/ucx/lib:\
112+ /usr/local/ucx/lib/ucx:\
113+ $LD_LIBRARY_PATH
100114ENV CPATH=/usr/include
101115ENV PATH=/usr/bin:$PATH
102116ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
@@ -109,8 +123,8 @@ WORKDIR /workspace
109123# TEMP: disable gds backend for arm64
110124RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
111125 cd /opt/nixl && \
112- git checkout ${NIXL_REF}
113- RUN if [ "$ARCH" = "arm64" ]; then \
126+ git checkout ${NIXL_REF} && \
127+ if [ "$ARCH" = "arm64" ]; then \
114128 cd /opt/nixl && \
115129 mkdir build && \
116130 meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
@@ -127,12 +141,10 @@ RUN if [ "$ARCH" = "arm64" ]; then \
127141 fi
128142
129143### NATS & ETCD SETUP ###
130- # nats
131- RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
132- dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb
133- # etcd
134144ENV ETCD_VERSION="v3.5.21"
135- RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
145+ RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \
146+ dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb && \
147+ wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
136148 mkdir -p /usr/local/bin/etcd && \
137149 tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
138150 rm /tmp/etcd.tar.gz
@@ -142,12 +154,12 @@ ENV PATH=/usr/local/bin/etcd/:$PATH
142154### VIRTUAL ENVIRONMENT SETUP ###
143155
144156# Install uv and create virtualenv
157+ ENV VIRTUAL_ENV=/opt/dynamo/venv
145158COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
146159RUN mkdir /opt/dynamo && \
147- uv venv /opt/dynamo/venv --python 3.12
160+ uv venv ${VIRTUAL_ENV} --python 3.12
148161
149162# Activate virtual environment
150- ENV VIRTUAL_ENV=/opt/dynamo/venv
151163ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
152164
153165# Install NIXL Python module
@@ -159,86 +171,47 @@ RUN if [ "$ARCH" = "arm64" ]; then \
159171 --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
160172 else \
161173 cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
162- fi
163-
164- # Install the wheel
165- # TODO: Move NIXL wheel install to the wheel_builder stage
166- RUN uv pip install /workspace/wheels/nixl/*.whl
174+ fi && \
175+ # Install the wheel
176+ # TODO: Move NIXL wheel install to the wheel_builder stage
177+ uv pip install /workspace/wheels/nixl/*.whl
167178
168179# Install vllm - keep this early in Dockerfile to avoid
169180# rebuilds from unrelated source code changes
170- # [gluo NOTE] currently using a fork of vllm until the fix
171- # for multi-modal disaggregated serving is merged upstream.
172- # see https://github.com/vllm-project/vllm/pull/21074
173- ARG VLLM_REPO=https://github.com/GuanLuo/vllm.git
174- ARG VLLM_REF="eaadf838ebe93e29a38a6fc1bab5a9801abe7d2c"
181+ ARG VLLM_REF
182+ ARG DEEPGEMM_REF
183+ ARG FLASHINF_REF
184+
175185ARG MAX_JOBS=16
176186ENV MAX_JOBS=$MAX_JOBS
177187ENV CUDA_HOME=/usr/local/cuda
188+
189+ # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
190+ # Should be able to select how you want your build to go
178191RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
179192 --mount=type=cache,target=/root/.cache/uv \
180- if [ "$ARCH" = "arm64" ]; then \
181- uv pip install pip cuda-python && \
182- mkdir /opt/vllm && \
183- cd /opt/vllm && \
184- git clone $VLLM_REPO && \
185- cd vllm && \
186- git checkout $VLLM_REF && \
187- uv pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 && \
188- python use_existing_torch.py && \
189- uv pip install -r requirements/build.txt && \
190- MAX_JOBS=${MAX_JOBS} uv pip install --no-build-isolation -e . -v && \
191- cd tools/ep_kernels && \
192- bash install_python_libraries.sh && \
193- cd ep_kernels_workspace && \
194- git clone https://github.com/deepseek-ai/DeepGEMM.git && \
195- cd DeepGEMM && \
196- sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
197- git submodule sync --recursive && \
198- git submodule update --init --recursive && \
199- cat install.sh && \
200- ./install.sh; \
201- else \
202- uv pip install pip cuda-python && \
203- mkdir /opt/vllm && \
204- cd /opt/vllm && \
205- git clone $VLLM_REPO && \
206- cd vllm && \
207- git checkout $VLLM_REF && \
208- VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
209- cd tools/ep_kernels && \
210- bash install_python_libraries.sh && \
211- cd ep_kernels_workspace && \
212- git clone https://github.com/deepseek-ai/DeepGEMM.git && \
213- cd DeepGEMM && \
214- sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
215- git submodule sync --recursive && \
216- git submodule update --init --recursive && \
217- cat install.sh && \
218- ./install.sh; \
219- fi
193+ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
194+ chmod +x /tmp/install_vllm.sh && \
195+ /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
196+
197+ ENV LD_LIBRARY_PATH=\
198+ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
199+ $LD_LIBRARY_PATH
220200
221201# Common dependencies
222202RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
223203 uv pip install --requirement /tmp/requirements.txt
224204
205+ ### MISC UTILITY SETUP ###
206+
225207# Install test dependencies
226208RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
227- uv pip install --requirement /tmp/requirements.txt
228-
229- # ### MISC UTILITY SETUP ###
230-
231- # Finish pyright install
232- RUN pyright --help > /dev/null 2>&1
233-
234- # Enable Git operations in the /workspace directory
235- RUN printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
209+ uv pip install --requirement /tmp/requirements.txt && \
210+ pyright --help > /dev/null 2>&1 && \
211+ printf "[safe]\n directory=/workspace\n" > /root/.gitconfig
236212
237213# Install prometheus
238214ARG PROM_VERSION=3.4.1
239- RUN apt-get update && apt-get install -y --no-install-recommends \
240- curl tar ca-certificates && \
241- rm -rf /var/lib/apt/lists/*
242215RUN ARCH=$(dpkg --print-architecture) && \
243216 case "$ARCH" in \
244217 amd64) PLATFORM=linux-amd64 ;; \
@@ -253,15 +226,6 @@ RUN ARCH=$(dpkg --print-architecture) && \
253226
254227### BUILDS ###
255228
256- # Rust build/dev dependencies
257- RUN apt update -y && \
258- apt install --no-install-recommends -y \
259- build-essential \
260- protobuf-compiler \
261- cmake \
262- libssl-dev \
263- pkg-config
264-
265229ENV RUSTUP_HOME=/usr/local/rustup \
266230 CARGO_HOME=/usr/local/cargo \
267231 PATH=/usr/local/cargo/bin:$PATH \
@@ -309,8 +273,8 @@ RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \
309273
310274# This is a slow operation (~40s on my cpu)
311275# Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu)
312- COPY --from=base --chown=$USER_UID:$USER_GID /opt/dynamo/venv/ /opt/dynamo/venv/
313- RUN chown $USERNAME:$USERNAME /opt/dynamo/venv
276+ COPY --from=base --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV}
277+ RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV}
314278COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin
315279
316280# so we can use maturin develop
@@ -365,6 +329,7 @@ ENV RUSTUP_HOME=/usr/local/rustup \
365329
366330COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
367331COPY --from=base $CARGO_HOME $CARGO_HOME
332+ # NIXL path default is NIXL_PREFIX=/opt/nvidia/nvda_nixl
368333COPY --from=base /usr/local/nixl /opt/nvidia/nvda_nixl
369334COPY --from=base /workspace /workspace
370335COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV
@@ -414,6 +379,11 @@ WORKDIR /workspace
414379
415380COPY --from=wheel_builder /workspace /workspace
416381COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
382+ ARG ARCH_ALT
383+ ENV LD_LIBRARY_PATH=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu:\
384+ /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugin:\
385+ $LD_LIBRARY_PATH
386+
417387# Copy Cargo cache to avoid re-downloading dependencies
418388COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
419389
@@ -447,8 +417,6 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
447417 sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
448418 echo "cat ~/.launch_screen" >> ~/.bashrc
449419
450- ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
451-
452420########################################
453421########## Development Image ###########
454422########################################
@@ -473,7 +441,11 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
473441RUN apt-get update && \
474442 DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
475443 build-essential \
476- python3-dev && \
444+ python3-dev \
445+ # JIT Kernel Compilation, flashinfer
446+ ninja-build \
447+ g++ \
448+ cuda-toolkit-12-8 && \
477449 rm -rf /var/lib/apt/lists/*
478450
479451### COPY BINDINGS ###
@@ -486,45 +458,41 @@ COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
486458ENV PATH=/usr/local/bin/etcd/:$PATH
487459
488460# Copy UCX from base image as plugin for NIXL
489- # Copy NIXL source from base image (required for NIXL plugins)
461+ # Copy NIXL source from wheel_builder image
490462COPY --from=base /usr/local/ucx /usr/local/ucx
491- COPY --from=base /usr/local/nixl /usr/local/nixl
492- ARG ARCH_ALT
493- ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
494- ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
495-
496- # Setup the python environment
497- COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
498- RUN uv venv $VIRTUAL_ENV --python 3.12 && \
499- echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
500-
501- # Common dependencies
502- RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
503- uv pip install --requirement /tmp/requirements.txt
504-
505- # Install test dependencies
506- #TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
507- RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
508- uv pip install --requirement /tmp/requirements.txt
509-
510- #TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
511- COPY . /workspace
512- RUN uv pip install /workspace/benchmarks
463+ COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
513464
514- # Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
515- # Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
516- #Copy NIXL and Dynamo wheels into wheelhouse
517- COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
518- COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
519- RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
520- uv pip install nixl --find-links wheelhouse && \
521- ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
522- rm -r wheelhouse
465+ # Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
466+ COPY --from=base /opt/vllm /opt/vllm
467+ ARG ARCH_ALT
468+ ENV LD_LIBRARY_PATH=\
469+ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
470+ /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu:\
471+ /opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugin:\
472+ /usr/local/ucx/lib:\
473+ /usr/local/ucx/lib/ucx:\
474+ $LD_LIBRARY_PATH
475+
476+
477+ # Copy entire venv
478+ # Theres a lot of stuff we'd have to re-compile
479+ # Think its better to just copy
480+ COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV}
481+
482+ # Once UX refactor is merged
483+ # Python components will have been pip installed and packaged in wheel
484+ # Can remove these files
485+ COPY components/ /workspace/components/
486+ COPY tests/ /workspace/tests/
487+ COPY examples/ /workspace/examples/
488+ COPY deploy/ /workspace/deploy/
489+ COPY benchmarks/ /workspace/benchmarks/
523490
524491# Copy launch banner
525492RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
526493 sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
527- echo "cat ~/.launch_screen" >> ~/.bashrc
494+ echo "cat ~/.launch_screen" >> ~/.bashrc && \
495+ echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc
528496
529497ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
530498CMD []
0 commit comments