Skip to content

Commit

Permalink
feat: Update build scripts, Dockerfile, and docker-compose for NGC bu…
Browse files Browse the repository at this point in the history
…ild with WHL file creation and installation

- Set CUDA version based on `nvcc` output and determine platform name based on architecture.
- Clean up and prepare the build directory, install build dependencies with caching, and copy source files.
- Build the project using `ninja`, update version information if `LMDEPLOY_VERSION` is set, and build the wheel with the determined platform name.
- Process the built wheel to include CUDA version information and copy it to `/lmdeploy_build` if `WRITE_WHL` is set to `true`.
- Update Dockerfile to set `LMDEPLOY_VERSION` as an argument and environment variable, copy necessary files, build the WHL file, verify it, and create a minimal stage to copy the WHL file.
- Install dependencies, set CUDA architecture list, copy and install the WHL file in the final image, and display detailed information about the `LMDEPLOY_VERSION`.
- Update docker-compose file to define common environment variables and logging options, set `WRITE_WHL` to `true` by default, mount necessary volumes, and specify the entrypoint script for the build service.
  • Loading branch information
zyearw1024 committed Oct 10, 2024
1 parent e31e70d commit 21747da
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/.github/
/.venv
/build
dist
# Ignore deploy_docker
deploy_docker/
docker/
*.so

# LMDeploy
workspace/
work_dir*/
76 changes: 76 additions & 0 deletions builder/manywheel/entrypoint_build_ngc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env bash
set -eux

# Set CUDA version based on nvcc output
export CUDAVER=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p' | tr -d '.')

# Determine the platform name based on the current architecture
ARCH=$(uname -m)
if [ "$ARCH" == "x86_64" ]; then
export PLAT_NAME="manylinux2014_x86_64"
elif [ "$ARCH" == "aarch64" ]; then
export PLAT_NAME="manylinux2014_aarch64"
else
echo "Unsupported architecture: $ARCH"
exit 1
fi

# # Install necessary packages
# apt update -y

# Clean up and prepare the build directory
rm -rf /tmpbuild
mkdir -p /tmpbuild

# Install build dependencies with caching
mkdir -p /docker_build_cache/.pip
pip3 install --cache-dir /docker_build_cache/.pip ninja cmake wheel

# Ensure the target directory exists before copying
mkdir -p /lmdeploy

# Copy source files to the build directory
cp -r /ant_lmdeploy/* /lmdeploy/

# Build the project
cd /lmdeploy
rm -rf /lmdeploy/lib
mkdir -p build && cd build && rm -rf *

# Use generate.sh to set up the build environment with external cache directory
bash ../generate.sh

ninja -j$(nproc) && ninja install || { echo "Build failed"; exit 1; }

cd ..
rm -rf build

# Update version information if LMDEPLOY_VERSION is set
if [ -n "$LMDEPLOY_VERSION" ]; then
sed -i "s/__version__ = '.*'/__version__ = '$LMDEPLOY_VERSION'/" /lmdeploy/lmdeploy/version.py
fi

# Build the wheel with the determined platform name
python setup.py bdist_wheel --cuda=${CUDAVER} --plat-name $PLAT_NAME -d /tmpbuild/

# Process the built wheel to include CUDA version information
for whl in /tmpbuild/*.whl; do
base_name=$(basename "$whl" .whl)

# Extract version number and add CUDA information
version=$(echo "$base_name" | sed -n 's/.*-\([0-9.]*\)-cp.*/\1/p')
new_version="${version}+cu${CUDAVER}"

# Construct the new file name
new_base_name=$(echo "$base_name" | sed "s/${version}/${new_version}/")

mv "$whl" "/tmpbuild/${new_base_name}.whl"

# Check WRITE_WHL environment variable to determine if the wheel should be copied to /lmdeploy_build
if [ "$WRITE_WHL" == "true" ]; then
if [ ! -d "/lmdeploy_build" ]; then
mkdir -p /lmdeploy_build
fi
cp "/tmpbuild/${new_base_name}.whl" "/lmdeploy_build/${new_base_name}.whl"
fi
done
101 changes: 101 additions & 0 deletions docker/Dockerfile_ngc
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
ARG LMDEPLOY_VERSION=0.6.1.1

# Stage 1: Build the WHL file
FROM nvcr.io/nvidia/pytorch:24.02-py3 AS builder

# Ubuntu 22.04 including Python 3.10
# NVIDIA CUDA 12.3.2
# NVIDIA cuBLAS 12.3.4.1
# NVIDIA cuDNN 9.0.0.306
# NVIDIA NCCL 2.19.4
# NVIDIA RAPIDS™ 23.12
# rdma-core 39.0
# NVIDIA HPC-X 2.16rc4
# OpenMPI 4.1.4+
# GDRCopy 2.3
# TensorBoard 2.9.0
# Nsight Compute 2023.3.1.1
# Nsight Systems 2023.4.1.97
# NVIDIA TensorRT™ 8.6.3
# Torch-TensorRT 2.2.0a0
# NVIDIA DALI® 1.34
# MAGMA 2.6.2
# JupyterLab 2.3.2 including Jupyter-TensorBoard
# TransformerEngine 1.3
# PyTorch quantization wheel 2.1.2
ARG LMDEPLOY_VERSION
ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION}
RUN echo "Stage 1 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}"

# Set environment variables
ENV TZ=Asia/Shanghai
ENV LOG_LEVEL=INFO
ENV CUDA_VISIBLE_DEVICES=0
ENV WRITE_WHL="true"

# Copy necessary files
COPY ./../ /ant_lmdeploy
COPY ./../builder/manywheel/entrypoint_build_ngc.sh /entrypoint_build.sh

# Build the WHL file
RUN sh /entrypoint_build.sh

# List contents of /tmpbuild for debugging
RUN ls -la /tmpbuild/

# Verify the WHL file
RUN WHL_FILE=$(ls /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \
echo "Found WHL file: ${WHL_FILE}"

# List contents of /lmdeploy_build for debugging
RUN ls -la /lmdeploy_build/

# Stage 2: Create a minimal stage to copy the WHL file
FROM scratch AS exporter
COPY --from=builder /lmdeploy_build/*.whl .

# Stage 3: Create the final image
FROM nvcr.io/nvidia/pytorch:24.02-py3

ARG LMDEPLOY_VERSION
ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION}

# Set CUDA architecture list
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}

# Install dependencies
COPY ./../requirements/ngc-build.txt /workspace/requirements-ngc-build.txt
RUN pip3 install -r /workspace/requirements-ngc-build.txt

# Display detailed information about the LMDEPLOY_VERSION for verification
RUN echo "Stage 3 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}" && \
echo "Current CUDA architecture list: ${TORCH_CUDA_ARCH_LIST}" && \
echo "Environment variables set: TZ=${TZ}, LOG_LEVEL=${LOG_LEVEL}, CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, WRITE_WHL=${WRITE_WHL}"

# Copy and install the WHL file
COPY --from=builder /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl /workspace/

# Ensure the WHL file is correctly specified
RUN WHL_FILE=$(ls /workspace/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \
echo "Installing WHL file: ${WHL_FILE}" && \
if [ -n "$WHL_FILE" ]; then pip3 install "$WHL_FILE" --no-deps; else echo "No WHL file found"; exit 1; fi

# # Install triton
# RUN pip3 install triton==2.1.0

WORKDIR /workspace


# Example build commands

# Step 1: If not in the docker directory, navigate to it first
# cd docker

# Step 2: Build and export the WHL file locally
# This step builds the WHL file and exports it to the local ./lmdeploy_build directory
# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 --build-arg LMDEPLOY_VERSION=0.6.1.3 --target exporter --output type=local,dest=./lmdeploy_build -f Dockerfile_ngc ..

# Step 3: Build the final image
# This step builds the complete image, including installing the WHL file
# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 -t ant_lmdeploy:v0.6.1.3_cu123_$(date +"%Y%m%d") --build-arg LMDEPLOY_VERSION=0.6.1.3 -f Dockerfile_ngc ..
51 changes: 51 additions & 0 deletions docker/docker-compose-ngc-build-amd64-dist.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
version: "3.9"
# FROM nvcr.io/nvidia/pytorch:24.02-py3

# Ubuntu 22.04 including Python 3.10
# NVIDIA CUDA 12.3.2
# NVIDIA cuBLAS 12.3.4.1
# NVIDIA cuDNN 9.0.0.306
# NVIDIA NCCL 2.19.4
# NVIDIA RAPIDS™ 23.12
# rdma-core 39.0
# NVIDIA HPC-X 2.16rc4
# OpenMPI 4.1.4+
# GDRCopy 2.3
# TensorBoard 2.9.0
# Nsight Compute 2023.3.1.1
# Nsight Systems 2023.4.1.97
# NVIDIA TensorRT™ 8.6.3
# Torch-TensorRT 2.2.0a0
# NVIDIA DALI® 1.34
# MAGMA 2.6.2
# JupyterLab 2.3.2 including Jupyter-TensorBoard
# TransformerEngine 1.3
# PyTorch quantization wheel 2.1.2
x-node-common:
&node-common
platform: linux/amd64
environment:
&node-common-env
TZ: Asia/Shanghai
LOG_LEVEL: INFO
CUDA_VISIBLE_DEVICES: 0
LMDEPLOY_VERSION: ${LMDEPLOY_VERSION:-0.6.1.1}
WRITE_WHL: "true" # 添加环境变量,默认值为 "true"
image: nvcr.io/nvidia/pytorch:24.02-py3
logging:
driver: json-file
options:
max-size: "100m"
max-file: "10"

services:
build-lmdeploy-whl-amd64-01:
<<: *node-common
container_name: build-lmdeploy-whl-amd64-01
volumes:
- ./../:/ant_lmdeploy
- ./lmdeploy_build:/lmdeploy_build
- ./docker_build_cache:/docker_build_cache
- ./../builder/manywheel/entrypoint_build_ngc.sh:/entrypoint_build.sh
entrypoint: sh /entrypoint_build.sh

0 comments on commit 21747da

Please sign in to comment.