forked from InternLM/lmdeploy
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Update build scripts, Dockerfile, and docker-compose for NGC bu…
…ild with WHL file creation and installation - Set CUDA version based on `nvcc` output and determine platform name based on architecture. - Clean up and prepare the build directory, install build dependencies with caching, and copy source files. - Build the project using `ninja`, update version information if `LMDEPLOY_VERSION` is set, and build the wheel with the determined platform name. - Process the built wheel to include CUDA version information and copy it to `/lmdeploy_build` if `WRITE_WHL` is set to `true`. - Update Dockerfile to set `LMDEPLOY_VERSION` as an argument and environment variable, copy necessary files, build the WHL file, verify it, and create a minimal stage to copy the WHL file. - Install dependencies, set CUDA architecture list, copy and install the WHL file in the final image, and display detailed information about the `LMDEPLOY_VERSION`. - Update docker-compose file to define common environment variables and logging options, set `WRITE_WHL` to `true` by default, mount necessary volumes, and specify the entrypoint script for the build service.
- Loading branch information
1 parent
e31e70d
commit 21747da
Showing
4 changed files
with
240 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/.github/ | ||
/.venv | ||
/build | ||
dist | ||
# Ignore deploy_docker | ||
deploy_docker/ | ||
docker/ | ||
*.so | ||
|
||
# LMDeploy | ||
workspace/ | ||
work_dir*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/usr/bin/env bash | ||
set -eux | ||
|
||
# Set CUDA version based on nvcc output | ||
export CUDAVER=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p' | tr -d '.') | ||
|
||
# Determine the platform name based on the current architecture | ||
ARCH=$(uname -m) | ||
if [ "$ARCH" == "x86_64" ]; then | ||
export PLAT_NAME="manylinux2014_x86_64" | ||
elif [ "$ARCH" == "aarch64" ]; then | ||
export PLAT_NAME="manylinux2014_aarch64" | ||
else | ||
echo "Unsupported architecture: $ARCH" | ||
exit 1 | ||
fi | ||
|
||
# # Install necessary packages | ||
# apt update -y | ||
|
||
# Clean up and prepare the build directory | ||
rm -rf /tmpbuild | ||
mkdir -p /tmpbuild | ||
|
||
# Install build dependencies with caching | ||
mkdir -p /docker_build_cache/.pip | ||
pip3 install --cache-dir /docker_build_cache/.pip ninja cmake wheel | ||
|
||
# Ensure the target directory exists before copying | ||
mkdir -p /lmdeploy | ||
|
||
# Copy source files to the build directory | ||
cp -r /ant_lmdeploy/* /lmdeploy/ | ||
|
||
# Build the project | ||
cd /lmdeploy | ||
rm -rf /lmdeploy/lib | ||
mkdir -p build && cd build && rm -rf * | ||
|
||
# Use generate.sh to set up the build environment with external cache directory | ||
bash ../generate.sh | ||
|
||
ninja -j$(nproc) && ninja install || { echo "Build failed"; exit 1; } | ||
|
||
cd .. | ||
rm -rf build | ||
|
||
# Update version information if LMDEPLOY_VERSION is set | ||
if [ -n "$LMDEPLOY_VERSION" ]; then | ||
sed -i "s/__version__ = '.*'/__version__ = '$LMDEPLOY_VERSION'/" /lmdeploy/lmdeploy/version.py | ||
fi | ||
|
||
# Build the wheel with the determined platform name | ||
python setup.py bdist_wheel --cuda=${CUDAVER} --plat-name $PLAT_NAME -d /tmpbuild/ | ||
|
||
# Process the built wheel to include CUDA version information | ||
for whl in /tmpbuild/*.whl; do | ||
base_name=$(basename "$whl" .whl) | ||
|
||
# Extract version number and add CUDA information | ||
version=$(echo "$base_name" | sed -n 's/.*-\([0-9.]*\)-cp.*/\1/p') | ||
new_version="${version}+cu${CUDAVER}" | ||
|
||
# Construct the new file name | ||
new_base_name=$(echo "$base_name" | sed "s/${version}/${new_version}/") | ||
|
||
mv "$whl" "/tmpbuild/${new_base_name}.whl" | ||
|
||
# Check WRITE_WHL environment variable to determine if the wheel should be copied to /lmdeploy_build | ||
if [ "$WRITE_WHL" == "true" ]; then | ||
if [ ! -d "/lmdeploy_build" ]; then | ||
mkdir -p /lmdeploy_build | ||
fi | ||
cp "/tmpbuild/${new_base_name}.whl" "/lmdeploy_build/${new_base_name}.whl" | ||
fi | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
ARG LMDEPLOY_VERSION=0.6.1.1 | ||
|
||
# Stage 1: Build the WHL file | ||
FROM nvcr.io/nvidia/pytorch:24.02-py3 AS builder | ||
|
||
# Ubuntu 22.04 including Python 3.10 | ||
# NVIDIA CUDA 12.3.2 | ||
# NVIDIA cuBLAS 12.3.4.1 | ||
# NVIDIA cuDNN 9.0.0.306 | ||
# NVIDIA NCCL 2.19.4 | ||
# NVIDIA RAPIDS™ 23.12 | ||
# rdma-core 39.0 | ||
# NVIDIA HPC-X 2.16rc4 | ||
# OpenMPI 4.1.4+ | ||
# GDRCopy 2.3 | ||
# TensorBoard 2.9.0 | ||
# Nsight Compute 2023.3.1.1 | ||
# Nsight Systems 2023.4.1.97 | ||
# NVIDIA TensorRT™ 8.6.3 | ||
# Torch-TensorRT 2.2.0a0 | ||
# NVIDIA DALI® 1.34 | ||
# MAGMA 2.6.2 | ||
# JupyterLab 2.3.2 including Jupyter-TensorBoard | ||
# TransformerEngine 1.3 | ||
# PyTorch quantization wheel 2.1.2 | ||
ARG LMDEPLOY_VERSION | ||
ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION} | ||
RUN echo "Stage 1 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}" | ||
|
||
# Set environment variables | ||
ENV TZ=Asia/Shanghai | ||
ENV LOG_LEVEL=INFO | ||
ENV CUDA_VISIBLE_DEVICES=0 | ||
ENV WRITE_WHL="true" | ||
|
||
# Copy necessary files | ||
COPY ./../ /ant_lmdeploy | ||
COPY ./../builder/manywheel/entrypoint_build_ngc.sh /entrypoint_build.sh | ||
|
||
# Build the WHL file | ||
RUN sh /entrypoint_build.sh | ||
|
||
# List contents of /tmpbuild for debugging | ||
RUN ls -la /tmpbuild/ | ||
|
||
# Verify the WHL file | ||
RUN WHL_FILE=$(ls /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \ | ||
echo "Found WHL file: ${WHL_FILE}" | ||
|
||
# List contents of /lmdeploy_build for debugging | ||
RUN ls -la /lmdeploy_build/ | ||
|
||
# Stage 2: Create a minimal stage to copy the WHL file | ||
FROM scratch AS exporter | ||
COPY --from=builder /lmdeploy_build/*.whl . | ||
|
||
# Stage 3: Create the final image | ||
FROM nvcr.io/nvidia/pytorch:24.02-py3 | ||
|
||
ARG LMDEPLOY_VERSION | ||
ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION} | ||
|
||
# Set CUDA architecture list | ||
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX' | ||
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} | ||
|
||
# Install dependencies | ||
COPY ./../requirements/ngc-build.txt /workspace/requirements-ngc-build.txt | ||
RUN pip3 install -r /workspace/requirements-ngc-build.txt | ||
|
||
# Display detailed information about the LMDEPLOY_VERSION for verification | ||
RUN echo "Stage 3 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}" && \ | ||
echo "Current CUDA architecture list: ${TORCH_CUDA_ARCH_LIST}" && \ | ||
echo "Environment variables set: TZ=${TZ}, LOG_LEVEL=${LOG_LEVEL}, CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, WRITE_WHL=${WRITE_WHL}" | ||
|
||
# Copy and install the WHL file | ||
COPY --from=builder /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl /workspace/ | ||
|
||
# Ensure the WHL file is correctly specified | ||
RUN WHL_FILE=$(ls /workspace/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \ | ||
echo "Installing WHL file: ${WHL_FILE}" && \ | ||
if [ -n "$WHL_FILE" ]; then pip3 install "$WHL_FILE" --no-deps; else echo "No WHL file found"; exit 1; fi | ||
|
||
# # Install triton | ||
# RUN pip3 install triton==2.1.0 | ||
|
||
WORKDIR /workspace | ||
|
||
|
||
# Example build commands | ||
|
||
# Step 1: If not in the docker directory, navigate to it first | ||
# cd docker | ||
|
||
# Step 2: Build and export the WHL file locally | ||
# This step builds the WHL file and exports it to the local ./lmdeploy_build directory | ||
# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 --build-arg LMDEPLOY_VERSION=0.6.1.3 --target exporter --output type=local,dest=./lmdeploy_build -f Dockerfile_ngc .. | ||
|
||
# Step 3: Build the final image | ||
# This step builds the complete image, including installing the WHL file | ||
# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 -t ant_lmdeploy:v0.6.1.3_cu123_$(date +"%Y%m%d") --build-arg LMDEPLOY_VERSION=0.6.1.3 -f Dockerfile_ngc .. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
version: "3.9" | ||
# FROM nvcr.io/nvidia/pytorch:24.02-py3 | ||
|
||
# Ubuntu 22.04 including Python 3.10 | ||
# NVIDIA CUDA 12.3.2 | ||
# NVIDIA cuBLAS 12.3.4.1 | ||
# NVIDIA cuDNN 9.0.0.306 | ||
# NVIDIA NCCL 2.19.4 | ||
# NVIDIA RAPIDS™ 23.12 | ||
# rdma-core 39.0 | ||
# NVIDIA HPC-X 2.16rc4 | ||
# OpenMPI 4.1.4+ | ||
# GDRCopy 2.3 | ||
# TensorBoard 2.9.0 | ||
# Nsight Compute 2023.3.1.1 | ||
# Nsight Systems 2023.4.1.97 | ||
# NVIDIA TensorRT™ 8.6.3 | ||
# Torch-TensorRT 2.2.0a0 | ||
# NVIDIA DALI® 1.34 | ||
# MAGMA 2.6.2 | ||
# JupyterLab 2.3.2 including Jupyter-TensorBoard | ||
# TransformerEngine 1.3 | ||
# PyTorch quantization wheel 2.1.2 | ||
x-node-common: | ||
&node-common | ||
platform: linux/amd64 | ||
environment: | ||
&node-common-env | ||
TZ: Asia/Shanghai | ||
LOG_LEVEL: INFO | ||
CUDA_VISIBLE_DEVICES: 0 | ||
LMDEPLOY_VERSION: ${LMDEPLOY_VERSION:-0.6.1.1} | ||
WRITE_WHL: "true" # 添加环境变量,默认值为 "true" | ||
image: nvcr.io/nvidia/pytorch:24.02-py3 | ||
logging: | ||
driver: json-file | ||
options: | ||
max-size: "100m" | ||
max-file: "10" | ||
|
||
services: | ||
build-lmdeploy-whl-amd64-01: | ||
<<: *node-common | ||
container_name: build-lmdeploy-whl-amd64-01 | ||
volumes: | ||
- ./../:/ant_lmdeploy | ||
- ./lmdeploy_build:/lmdeploy_build | ||
- ./docker_build_cache:/docker_build_cache | ||
- ./../builder/manywheel/entrypoint_build_ngc.sh:/entrypoint_build.sh | ||
entrypoint: sh /entrypoint_build.sh | ||
|