feat: Update build scripts, Dockerfile, and docker-compose for NGC bu…

…ild with WHL file creation and installation - Set CUDA version based on `nvcc` output and determine platform name based on architecture. - Clean up and prepare the build directory, install build dependencies with caching, and copy source files. - Build the project using `ninja`, update version information if `LMDEPLOY_VERSION` is set, and build the wheel with the determined platform name. - Process the built wheel to include CUDA version information and copy it to `/lmdeploy_build` if `WRITE_WHL` is set to `true`. - Update Dockerfile to set `LMDEPLOY_VERSION` as an argument and environment variable, copy necessary files, build the WHL file, verify it, and create a minimal stage to copy the WHL file. - Install dependencies, set CUDA architecture list, copy and install the WHL file in the final image, and display detailed information about the `LMDEPLOY_VERSION`. - Update docker-compose file to define common environment variables and logging options, set `WRITE_WHL` to `true` by default, mount necessary volumes, and specify the entrypoint script for the build service.
zyearw1024 · Oct 10, 2024 · 21747da · 21747da
1 parent e31e70d
commit 21747da
Show file tree

Hide file tree

Showing 4 changed files with 240 additions and 0 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,12 @@
+/.github/
+/.venv
+/build
+dist
+# Ignore deploy_docker
+deploy_docker/
+docker/
+*.so
+
+# LMDeploy
+workspace/
+work_dir*/
diff --git a/builder/manywheel/entrypoint_build_ngc.sh b/builder/manywheel/entrypoint_build_ngc.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+set -eux
+
+# Set CUDA version based on nvcc output
+export CUDAVER=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p' | tr -d '.')
+
+# Determine the platform name based on the current architecture
+ARCH=$(uname -m)
+if [ "$ARCH" == "x86_64" ]; then
+    export PLAT_NAME="manylinux2014_x86_64"
+elif [ "$ARCH" == "aarch64" ]; then
+    export PLAT_NAME="manylinux2014_aarch64"
+else
+    echo "Unsupported architecture: $ARCH"
+    exit 1
+fi
+
+# # Install necessary packages
+# apt update -y
+
+# Clean up and prepare the build directory
+rm -rf /tmpbuild
+mkdir -p /tmpbuild
+
+# Install build dependencies with caching
+mkdir -p /docker_build_cache/.pip
+pip3 install --cache-dir /docker_build_cache/.pip ninja cmake wheel
+
+# Ensure the target directory exists before copying
+mkdir -p /lmdeploy
+
+# Copy source files to the build directory
+cp -r /ant_lmdeploy/* /lmdeploy/
+
+# Build the project
+cd /lmdeploy
+rm -rf /lmdeploy/lib
+mkdir -p build && cd build && rm -rf *
+
+# Use generate.sh to set up the build environment with external cache directory
+bash ../generate.sh
+
+ninja -j$(nproc) && ninja install || { echo "Build failed"; exit 1; }
+
+cd ..
+rm -rf build
+
+# Update version information if LMDEPLOY_VERSION is set
+if [ -n "$LMDEPLOY_VERSION" ]; then
+    sed -i "s/__version__ = '.*'/__version__ = '$LMDEPLOY_VERSION'/" /lmdeploy/lmdeploy/version.py
+fi
+
+# Build the wheel with the determined platform name
+python setup.py bdist_wheel --cuda=${CUDAVER} --plat-name $PLAT_NAME -d /tmpbuild/
+
+# Process the built wheel to include CUDA version information
+for whl in /tmpbuild/*.whl; do
+    base_name=$(basename "$whl" .whl)
+
+    # Extract version number and add CUDA information
+    version=$(echo "$base_name" | sed -n 's/.*-\([0-9.]*\)-cp.*/\1/p')
+    new_version="${version}+cu${CUDAVER}"
+
+    # Construct the new file name
+    new_base_name=$(echo "$base_name" | sed "s/${version}/${new_version}/")
+
+    mv "$whl" "/tmpbuild/${new_base_name}.whl"
+
+    # Check WRITE_WHL environment variable to determine if the wheel should be copied to /lmdeploy_build
+    if [ "$WRITE_WHL" == "true" ]; then
+        if [ ! -d "/lmdeploy_build" ]; then
+            mkdir -p /lmdeploy_build
+        fi
+        cp "/tmpbuild/${new_base_name}.whl" "/lmdeploy_build/${new_base_name}.whl"
+    fi
+done
diff --git a/docker/Dockerfile_ngc b/docker/Dockerfile_ngc
@@ -0,0 +1,101 @@
+ARG LMDEPLOY_VERSION=0.6.1.1
+
+# Stage 1: Build the WHL file
+FROM nvcr.io/nvidia/pytorch:24.02-py3 AS builder
+
+# Ubuntu 22.04 including Python 3.10
+# NVIDIA CUDA 12.3.2
+# NVIDIA cuBLAS 12.3.4.1
+# NVIDIA cuDNN 9.0.0.306
+# NVIDIA NCCL 2.19.4
+# NVIDIA RAPIDS™ 23.12
+# rdma-core 39.0
+# NVIDIA HPC-X 2.16rc4
+# OpenMPI 4.1.4+
+# GDRCopy 2.3
+# TensorBoard 2.9.0
+# Nsight Compute 2023.3.1.1
+# Nsight Systems 2023.4.1.97
+# NVIDIA TensorRT™ 8.6.3
+# Torch-TensorRT 2.2.0a0
+# NVIDIA DALI® 1.34
+# MAGMA 2.6.2
+# JupyterLab 2.3.2 including Jupyter-TensorBoard
+# TransformerEngine 1.3
+# PyTorch quantization wheel 2.1.2
+ARG LMDEPLOY_VERSION
+ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION}
+RUN echo "Stage 1 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}"
+
+# Set environment variables
+ENV TZ=Asia/Shanghai
+ENV LOG_LEVEL=INFO
+ENV CUDA_VISIBLE_DEVICES=0
+ENV WRITE_WHL="true"
+
+# Copy necessary files
+COPY ./../ /ant_lmdeploy
+COPY ./../builder/manywheel/entrypoint_build_ngc.sh /entrypoint_build.sh
+
+# Build the WHL file
+RUN sh /entrypoint_build.sh
+
+# List contents of /tmpbuild for debugging
+RUN ls -la /tmpbuild/
+
+# Verify the WHL file
+RUN WHL_FILE=$(ls /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \
+    echo "Found WHL file: ${WHL_FILE}"
+
+# List contents of /lmdeploy_build for debugging
+RUN ls -la /lmdeploy_build/
+
+# Stage 2: Create a minimal stage to copy the WHL file
+FROM scratch AS exporter
+COPY --from=builder /lmdeploy_build/*.whl .
+
+# Stage 3: Create the final image
+FROM nvcr.io/nvidia/pytorch:24.02-py3
+
+ARG LMDEPLOY_VERSION
+ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION}
+
+# Set CUDA architecture list
+ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
+ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
+
+# Install dependencies
+COPY ./../requirements/ngc-build.txt /workspace/requirements-ngc-build.txt
+RUN pip3 install -r /workspace/requirements-ngc-build.txt
+
+# Display detailed information about the LMDEPLOY_VERSION for verification
+RUN echo "Stage 3 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}" && \
+    echo "Current CUDA architecture list: ${TORCH_CUDA_ARCH_LIST}" && \
+    echo "Environment variables set: TZ=${TZ}, LOG_LEVEL=${LOG_LEVEL}, CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, WRITE_WHL=${WRITE_WHL}"
+
+# Copy and install the WHL file
+COPY --from=builder /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl /workspace/
+
+# Ensure the WHL file is correctly specified
+RUN WHL_FILE=$(ls /workspace/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \
+    echo "Installing WHL file: ${WHL_FILE}" && \
+    if [ -n "$WHL_FILE" ]; then pip3 install "$WHL_FILE" --no-deps; else echo "No WHL file found"; exit 1; fi
+
+ # # Install triton
+# RUN pip3 install triton==2.1.0
+
+WORKDIR /workspace
+
+
+# Example build commands
+
+# Step 1: If not in the docker directory, navigate to it first
+# cd docker
+
+# Step 2: Build and export the WHL file locally
+# This step builds the WHL file and exports it to the local ./lmdeploy_build directory
+# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 --build-arg LMDEPLOY_VERSION=0.6.1.3   --target exporter --output type=local,dest=./lmdeploy_build -f Dockerfile_ngc ..
+
+# Step 3: Build the final image
+# This step builds the complete image, including installing the WHL file
+# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 -t ant_lmdeploy:v0.6.1.3_cu123_$(date +"%Y%m%d") --build-arg LMDEPLOY_VERSION=0.6.1.3 -f Dockerfile_ngc ..
diff --git a/docker/docker-compose-ngc-build-amd64-dist.yml b/docker/docker-compose-ngc-build-amd64-dist.yml
@@ -0,0 +1,51 @@
+version: "3.9"
+# FROM nvcr.io/nvidia/pytorch:24.02-py3
+
+# Ubuntu 22.04 including Python 3.10
+# NVIDIA CUDA 12.3.2
+# NVIDIA cuBLAS 12.3.4.1
+# NVIDIA cuDNN 9.0.0.306
+# NVIDIA NCCL 2.19.4
+# NVIDIA RAPIDS™ 23.12
+# rdma-core 39.0
+# NVIDIA HPC-X 2.16rc4
+# OpenMPI 4.1.4+
+# GDRCopy 2.3
+# TensorBoard 2.9.0
+# Nsight Compute 2023.3.1.1
+# Nsight Systems 2023.4.1.97
+# NVIDIA TensorRT™ 8.6.3
+# Torch-TensorRT 2.2.0a0
+# NVIDIA DALI® 1.34
+# MAGMA 2.6.2
+# JupyterLab 2.3.2 including Jupyter-TensorBoard
+# TransformerEngine 1.3
+# PyTorch quantization wheel 2.1.2
+x-node-common:
+  &node-common
+  platform: linux/amd64
+  environment:
+    &node-common-env
+    TZ: Asia/Shanghai
+    LOG_LEVEL: INFO
+    CUDA_VISIBLE_DEVICES: 0
+    LMDEPLOY_VERSION: ${LMDEPLOY_VERSION:-0.6.1.1}
+    WRITE_WHL: "true"  # 添加环境变量，默认值为 "true"
+  image: nvcr.io/nvidia/pytorch:24.02-py3
+  logging:
+    driver: json-file
+    options:
+      max-size: "100m"
+      max-file: "10"
+
+services:
+  build-lmdeploy-whl-amd64-01:
+    <<: *node-common
+    container_name: build-lmdeploy-whl-amd64-01
+    volumes:
+      - ./../:/ant_lmdeploy
+      - ./lmdeploy_build:/lmdeploy_build
+      - ./docker_build_cache:/docker_build_cache
+      - ./../builder/manywheel/entrypoint_build_ngc.sh:/entrypoint_build.sh
+    entrypoint: sh /entrypoint_build.sh
+