Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 5 additions & 17 deletions container/Dockerfile.sglang
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,6 @@ ARG ARCH_ALT=x86_64
ARG SGLANG_VERSION="0.4.9.post1"
ARG SGL_KERNEL_VERSION="0.2.4"

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT

WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

##################################
########## Base Image ############
##################################
Expand All @@ -52,6 +38,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4

USER root
ARG PYTHON_VERSION=3.12
Expand Down Expand Up @@ -111,9 +98,10 @@ SHELL ["/bin/bash", "-c"]
WORKDIR /workspace

### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
# Clone nixl source, and checkout the nixl ref
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
Expand Down
18 changes: 5 additions & 13 deletions container/Dockerfile.tensorrt_llm
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,6 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base

WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

##################################
########## Build Image ###########
##################################
Expand All @@ -53,6 +43,7 @@ FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS build
# Redeclare ARCH and ARCH_ALT so they're available in this build stage
ARG ARCH
ARG ARCH_ALT
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4

USER root

Expand Down Expand Up @@ -101,9 +92,10 @@ ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
SHELL ["/bin/bash", "-c"]

# NIXL SETUP
# Copy nixl source, and use commit hash as cache hint
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
# Clone nixl source, and checkout the nixl ref
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
Expand Down
38 changes: 17 additions & 21 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,6 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG ARCH=amd64
ARG ARCH_ALT=x86_64

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS nixl_base

# Redeclare ARCH and ARCH_ALT so they're available in this stage
ARG ARCH
ARG ARCH_ALT

WORKDIR /opt/nixl
# Add a cache hint that only changes when the nixl commit changes
ARG NIXL_COMMIT
# This line acts as a cache key - it only changes when NIXL_COMMIT changes
RUN echo "NIXL commit: ${NIXL_COMMIT}" > /opt/nixl/commit.txt
# Copy the nixl source
COPY --from=nixl . .

##################################
########## Base Image ############
##################################
Expand Down Expand Up @@ -80,6 +66,7 @@ RUN apt-get update -y && \
libnuma-dev librdmacm-dev ibverbs-providers

ARG NIXL_UCX_REF=v1.19.x
ARG NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4

WORKDIR /workspace

Expand Down Expand Up @@ -118,10 +105,11 @@ SHELL ["/bin/bash", "-c"]
WORKDIR /workspace

### NIXL SETUP ###
# Copy nixl source, and use commit hash as cache hint
# Clone nixl source
# TEMP: disable gds backend for arm64
COPY --from=nixl_base /opt/nixl /opt/nixl
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
RUN git clone "https://github.com/ai-dynamo/nixl.git" /opt/nixl && \
cd /opt/nixl && \
git checkout ${NIXL_REF}
RUN if [ "$ARCH" = "arm64" ]; then \
cd /opt/nixl && \
mkdir build && \
Expand Down Expand Up @@ -199,9 +187,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install; \
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
git submodule sync --recursive && \
git submodule update --init --recursive && \
cat install.sh && \
./install.sh; \
else \
uv pip install pip cuda-python && \
mkdir /opt/vllm && \
Expand All @@ -213,9 +205,13 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
cd tools/ep_kernels && \
bash install_python_libraries.sh && \
cd ep_kernels_workspace && \
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
git clone https://github.com/deepseek-ai/DeepGEMM.git && \
cd DeepGEMM && \
python setup.py install; \
sed -i 's|git@github.com:|https://github.com/|g' .gitmodules && \
git submodule sync --recursive && \
git submodule update --init --recursive && \
cat install.sh && \
./install.sh; \
fi

# Common dependencies
Expand Down
40 changes: 5 additions & 35 deletions container/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,7 @@ NONE_BASE_IMAGE_TAG="24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"

NIXL_COMMIT=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
NIXL_REPO=ai-dynamo/nixl.git

NIXL_REF=3c47a48955e6f96bd5d4fb43a9d80bb64722f8e4
NIXL_UCX_EFA_REF=7ec95b95e524a87e81cac92f5ca8523e3966b16b

NO_CACHE=""
Expand Down Expand Up @@ -390,7 +388,7 @@ if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
ARCH="arm64"
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
# TEMP: Pin to nixl 0.3.1 for arm build, since 0.4.0 fails
NIXL_COMMIT=3503658e71143b56f9d5b1b440d84a94b9c41af8
NIXL_REF=3503658e71143b56f9d5b1b440d84a94b9c41af8
fi

# Update DOCKERFILE if framework is VLLM
Expand All @@ -404,36 +402,8 @@ elif [[ $FRAMEWORK == "SGLANG" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
fi

NIXL_DIR="/tmp/nixl/nixl_src"

# Clone original NIXL to temp directory
if [ -d "$NIXL_DIR" ]; then
echo "Warning: $NIXL_DIR already exists, skipping clone"
else
if [ -n "${GITHUB_TOKEN}" ]; then
git clone "https://oauth2:${GITHUB_TOKEN}@github.com/${NIXL_REPO}" "$NIXL_DIR"
else
# Try HTTPS first with credential prompting disabled, fall back to SSH if it fails
if ! GIT_TERMINAL_PROMPT=0 git clone https://github.com/${NIXL_REPO} "$NIXL_DIR"; then
echo "HTTPS clone failed, falling back to SSH..."
git clone git@github.com:${NIXL_REPO} "$NIXL_DIR"
fi
fi
fi

pushd "$NIXL_DIR" || exit
if ! git checkout ${NIXL_COMMIT}; then
echo "ERROR: Failed to checkout NIXL commit ${NIXL_COMMIT}. The cached directory may be out of date."
echo "Please delete $NIXL_DIR and re-run the build script."
exit 1
fi

popd

BUILD_CONTEXT_ARG+=" --build-context nixl=$NIXL_DIR"

# Add NIXL_COMMIT as a build argument to enable caching
BUILD_ARGS+=" --build-arg NIXL_COMMIT=${NIXL_COMMIT} "
# Add NIXL_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "

if [[ $TARGET == "local-dev" ]]; then
BUILD_ARGS+=" --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) "
Expand Down Expand Up @@ -519,7 +489,7 @@ if [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}" "${ARCH}_${TRTLLM_COMMIT}"; then
echo "WARN: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}, attempting to build from source"
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_COMMIT}; then
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF}; then
error "ERROR: Failed to build TensorRT-LLM wheel"
fi
fi
Expand Down
Loading