@@ -18,6 +18,9 @@ ARG TORCH_BACKEND="cu128"
1818ARG DEEPGEMM_REF="03d0be3"
1919ARG FLASHINF_REF="1d72ed4"
2020
21+ # Make sure to update the dependency version in pyproject.toml when updating this
22+ ARG VLLM_VERSION="0.9.2"
23+
2124# Define general architecture ARGs for supporting both x86 and aarch64 builds.
2225# ARCH: Used for package suffixes (e.g., amd64, arm64)
2326# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
@@ -39,10 +42,11 @@ ARG ARCH_ALT=x86_64
3942
4043FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
4144
42- # Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
45+ # Redeclare ARCH, ARCH_ALT, TORCH_BACKEND, VLLM_VERSION so they're available in this stage
4346ARG ARCH
4447ARG ARCH_ALT
4548ARG TORCH_BACKEND
49+ ARG VLLM_VERSION
4650
4751USER root
4852ARG PYTHON_VERSION=3.12
@@ -134,7 +138,7 @@ RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \
134138 cd ${NIXL_SRC_DIR} && \
135139 git checkout ${NIXL_REF} && \
136140 if [ "$ARCH" = "arm64" ]; then \
137- nixl_build_args="-Ddisable_gds_backend=true -Dgds_path=/usr/local/cuda/targets/sbsa-linux "; \
141+ nixl_build_args="-Ddisable_gds_backend=true"; \
138142 else \
139143 nixl_build_args=""; \
140144 fi && \
@@ -171,8 +175,7 @@ ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
171175# TEMP: disable gds backend for arm64
172176RUN if [ "$ARCH" = "arm64" ]; then \
173177 cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \
174- --config-settings=setup-args="-Ddisable_gds_backend=true" \
175- --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
178+ --config-settings=setup-args="-Ddisable_gds_backend=true"; \
176179 else \
177180 cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \
178181 fi && \
@@ -190,13 +193,17 @@ ARG MAX_JOBS=16
190193ENV MAX_JOBS=$MAX_JOBS
191194ENV CUDA_HOME=/usr/local/cuda
192195
193- # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
194- # Should be able to select how you want your build to go
195196RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
196197 --mount=type=cache,target=/root/.cache/uv \
197- cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
198- chmod +x /tmp/install_vllm.sh && \
199- /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND
198+ if [ "$ARCH" = "arm64" ]; then \
199+ # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs
200+ # Should be able to select how you want your build to go
201+ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
202+ chmod +x /tmp/install_vllm.sh && \
203+ /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND; \
204+ else \
205+ uv pip install "vllm==${VLLM_VERSION}"; \
206+ fi
200207
201208ENV LD_LIBRARY_PATH=\
202209/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
@@ -348,7 +355,6 @@ COPY LICENSE /workspace/
348355COPY Cargo.toml /workspace/
349356COPY Cargo.lock /workspace/
350357COPY rust-toolchain.toml /workspace/
351- COPY hatch_build.py /workspace/
352358
353359# Copy source code
354360COPY lib/ /workspace/lib/
@@ -392,22 +398,11 @@ COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
392398# Copy rest of the code
393399COPY . /workspace
394400
395- # Build C bindings, creates lib/bindings/c/include
396- #
397- # TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
398- # `lib/bindings/c/include` folder that build.rs generated across.
399- # I couldn't get that to work, hence TODO.
400- RUN cd /workspace/lib/bindings/c && cargo build --release --locked
401-
402401# Package the bindings
403402RUN mkdir -p /opt/dynamo/bindings/wheels && \
404403 mkdir /opt/dynamo/bindings/lib && \
405404 cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \
406- cp target/release/libdynamo_llm_capi.so /opt/dynamo/bindings/lib/. && \
407- cp -r lib/bindings/c/include /opt/dynamo/bindings/. && \
408- cp target/release/dynamo-run /usr/local/bin && \
409- cp target/release/metrics /usr/local/bin && \
410- cp target/release/mock_worker /usr/local/bin
405+ cp target/release/metrics /usr/local/bin
411406
412407RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \
413408 uv pip install /workspace/dist/ai_dynamo*any.whl
@@ -455,9 +450,6 @@ RUN apt-get update && \
455450 cuda-toolkit-12-8 && \
456451 rm -rf /var/lib/apt/lists/*
457452
458- ### COPY BINDINGS ###
459- # Copy all bindings (wheels, lib, include) from ci_minimum
460- COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
461453### COPY NATS & ETCD ###
462454# Copy nats and etcd from base image
463455COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
@@ -466,11 +458,16 @@ ENV PATH=/usr/local/bin/etcd/:$PATH
466458
467459# Copy UCX from base image as plugin for NIXL
468460# Copy NIXL source from wheel_builder image
461+ # Copy dynamo wheels for gitlab artifacts
469462COPY --from=base /usr/local/ucx /usr/local/ucx
470463COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX
464+ COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
471465
472466# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries
473- COPY --from=base /opt/vllm /opt/vllm
467+ RUN if [ "$ARCH" = "arm64" ]; then \
468+ COPY --from=base /opt/vllm /opt/vllm; \
469+ fi
470+
474471ENV LD_LIBRARY_PATH=\
475472/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
476473$NIXL_LIB_DIR:\
@@ -479,10 +476,11 @@ $NIXL_PLUGIN_DIR:\
479476/usr/local/ucx/lib/ucx:\
480477$LD_LIBRARY_PATH
481478
482-
483479# Copy entire venv
484- # Theres a lot of stuff we'd have to re-compile
485- # Think its better to just copy
480+ # Theres a lot of stuff we'd have to re-compile (for arm64)
481+ # TODO: use pip ai-dynamo[vllm] in venv to replicate end user environment
482+ # Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel
483+ COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics
486484COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV}
487485
488486# Once UX refactor is merged
0 commit comments