Skip to content

Commit

Permalink
Merge pull request #5 from zyearw1024/adjust/add_monkey_patch_20241009
Browse files Browse the repository at this point in the history
Adjust/add monkey patch 20241009
  • Loading branch information
zyearw1024 authored Oct 10, 2024
2 parents d8432ae + 21747da commit 58f1ae6
Show file tree
Hide file tree
Showing 7 changed files with 339 additions and 0 deletions.
12 changes: 12 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/.github/
/.venv
/build
dist
# Ignore deploy_docker
deploy_docker/
docker/
*.so

# LMDeploy
workspace/
work_dir*/
76 changes: 76 additions & 0 deletions builder/manywheel/entrypoint_build_ngc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#!/usr/bin/env bash
set -eux

# Set CUDA version based on nvcc output
export CUDAVER=$(nvcc --version | sed -n 's/^.*release \([0-9]\+\.[0-9]\+\).*$/\1/p' | tr -d '.')

# Determine the platform name based on the current architecture
ARCH=$(uname -m)
if [ "$ARCH" == "x86_64" ]; then
export PLAT_NAME="manylinux2014_x86_64"
elif [ "$ARCH" == "aarch64" ]; then
export PLAT_NAME="manylinux2014_aarch64"
else
echo "Unsupported architecture: $ARCH"
exit 1
fi

# # Install necessary packages
# apt update -y

# Clean up and prepare the build directory
rm -rf /tmpbuild
mkdir -p /tmpbuild

# Install build dependencies with caching
mkdir -p /docker_build_cache/.pip
pip3 install --cache-dir /docker_build_cache/.pip ninja cmake wheel

# Ensure the target directory exists before copying
mkdir -p /lmdeploy

# Copy source files to the build directory
cp -r /ant_lmdeploy/* /lmdeploy/

# Build the project
cd /lmdeploy
rm -rf /lmdeploy/lib
mkdir -p build && cd build && rm -rf *

# Use generate.sh to set up the build environment with external cache directory
bash ../generate.sh

ninja -j$(nproc) && ninja install || { echo "Build failed"; exit 1; }

cd ..
rm -rf build

# Update version information if LMDEPLOY_VERSION is set
if [ -n "$LMDEPLOY_VERSION" ]; then
sed -i "s/__version__ = '.*'/__version__ = '$LMDEPLOY_VERSION'/" /lmdeploy/lmdeploy/version.py
fi

# Build the wheel with the determined platform name
python setup.py bdist_wheel --cuda=${CUDAVER} --plat-name $PLAT_NAME -d /tmpbuild/

# Process the built wheel to include CUDA version information
for whl in /tmpbuild/*.whl; do
base_name=$(basename "$whl" .whl)

# Extract version number and add CUDA information
version=$(echo "$base_name" | sed -n 's/.*-\([0-9.]*\)-cp.*/\1/p')
new_version="${version}+cu${CUDAVER}"

# Construct the new file name
new_base_name=$(echo "$base_name" | sed "s/${version}/${new_version}/")

mv "$whl" "/tmpbuild/${new_base_name}.whl"

# Check WRITE_WHL environment variable to determine if the wheel should be copied to /lmdeploy_build
if [ "$WRITE_WHL" == "true" ]; then
if [ ! -d "/lmdeploy_build" ]; then
mkdir -p /lmdeploy_build
fi
cp "/tmpbuild/${new_base_name}.whl" "/lmdeploy_build/${new_base_name}.whl"
fi
done
101 changes: 101 additions & 0 deletions docker/Dockerfile_ngc
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
ARG LMDEPLOY_VERSION=0.6.1.1

# Stage 1: Build the WHL file
FROM nvcr.io/nvidia/pytorch:24.02-py3 AS builder

# Ubuntu 22.04 including Python 3.10
# NVIDIA CUDA 12.3.2
# NVIDIA cuBLAS 12.3.4.1
# NVIDIA cuDNN 9.0.0.306
# NVIDIA NCCL 2.19.4
# NVIDIA RAPIDS™ 23.12
# rdma-core 39.0
# NVIDIA HPC-X 2.16rc4
# OpenMPI 4.1.4+
# GDRCopy 2.3
# TensorBoard 2.9.0
# Nsight Compute 2023.3.1.1
# Nsight Systems 2023.4.1.97
# NVIDIA TensorRT™ 8.6.3
# Torch-TensorRT 2.2.0a0
# NVIDIA DALI® 1.34
# MAGMA 2.6.2
# JupyterLab 2.3.2 including Jupyter-TensorBoard
# TransformerEngine 1.3
# PyTorch quantization wheel 2.1.2
ARG LMDEPLOY_VERSION
ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION}
RUN echo "Stage 1 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}"

# Set environment variables
ENV TZ=Asia/Shanghai
ENV LOG_LEVEL=INFO
ENV CUDA_VISIBLE_DEVICES=0
ENV WRITE_WHL="true"

# Copy necessary files
COPY ./../ /ant_lmdeploy
COPY ./../builder/manywheel/entrypoint_build_ngc.sh /entrypoint_build.sh

# Build the WHL file
RUN sh /entrypoint_build.sh

# List contents of /tmpbuild for debugging
RUN ls -la /tmpbuild/

# Verify the WHL file
RUN WHL_FILE=$(ls /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \
echo "Found WHL file: ${WHL_FILE}"

# List contents of /lmdeploy_build for debugging
RUN ls -la /lmdeploy_build/

# Stage 2: Create a minimal stage to copy the WHL file
FROM scratch AS exporter
COPY --from=builder /lmdeploy_build/*.whl .

# Stage 3: Create the final image
FROM nvcr.io/nvidia/pytorch:24.02-py3

ARG LMDEPLOY_VERSION
ENV LMDEPLOY_VERSION=${LMDEPLOY_VERSION}

# Set CUDA architecture list
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}

# Install dependencies
COPY ./../requirements/ngc-build.txt /workspace/requirements-ngc-build.txt
RUN pip3 install -r /workspace/requirements-ngc-build.txt

# Display detailed information about the LMDEPLOY_VERSION for verification
RUN echo "Stage 3 LMDEPLOY_VERSION: ${LMDEPLOY_VERSION}" && \
echo "Current CUDA architecture list: ${TORCH_CUDA_ARCH_LIST}" && \
echo "Environment variables set: TZ=${TZ}, LOG_LEVEL=${LOG_LEVEL}, CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}, WRITE_WHL=${WRITE_WHL}"

# Copy and install the WHL file
COPY --from=builder /tmpbuild/lmdeploy-${LMDEPLOY_VERSION}*.whl /workspace/

# Ensure the WHL file is correctly specified
RUN WHL_FILE=$(ls /workspace/lmdeploy-${LMDEPLOY_VERSION}*.whl) && \
echo "Installing WHL file: ${WHL_FILE}" && \
if [ -n "$WHL_FILE" ]; then pip3 install "$WHL_FILE" --no-deps; else echo "No WHL file found"; exit 1; fi

# # Install triton
# RUN pip3 install triton==2.1.0

WORKDIR /workspace


# Example build commands

# Step 1: If not in the docker directory, navigate to it first
# cd docker

# Step 2: Build and export the WHL file locally
# This step builds the WHL file and exports it to the local ./lmdeploy_build directory
# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 --build-arg LMDEPLOY_VERSION=0.6.1.3 --target exporter --output type=local,dest=./lmdeploy_build -f Dockerfile_ngc ..

# Step 3: Build the final image
# This step builds the complete image, including installing the WHL file
# time DOCKER_BUILDKIT=1 docker build --progress=plain --platform linux/amd64 -t ant_lmdeploy:v0.6.1.3_cu123_$(date +"%Y%m%d") --build-arg LMDEPLOY_VERSION=0.6.1.3 -f Dockerfile_ngc ..
51 changes: 51 additions & 0 deletions docker/docker-compose-ngc-build-amd64-dist.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
version: "3.9"
# FROM nvcr.io/nvidia/pytorch:24.02-py3

# Ubuntu 22.04 including Python 3.10
# NVIDIA CUDA 12.3.2
# NVIDIA cuBLAS 12.3.4.1
# NVIDIA cuDNN 9.0.0.306
# NVIDIA NCCL 2.19.4
# NVIDIA RAPIDS™ 23.12
# rdma-core 39.0
# NVIDIA HPC-X 2.16rc4
# OpenMPI 4.1.4+
# GDRCopy 2.3
# TensorBoard 2.9.0
# Nsight Compute 2023.3.1.1
# Nsight Systems 2023.4.1.97
# NVIDIA TensorRT™ 8.6.3
# Torch-TensorRT 2.2.0a0
# NVIDIA DALI® 1.34
# MAGMA 2.6.2
# JupyterLab 2.3.2 including Jupyter-TensorBoard
# TransformerEngine 1.3
# PyTorch quantization wheel 2.1.2
x-node-common:
&node-common
platform: linux/amd64
environment:
&node-common-env
TZ: Asia/Shanghai
LOG_LEVEL: INFO
CUDA_VISIBLE_DEVICES: 0
LMDEPLOY_VERSION: ${LMDEPLOY_VERSION:-0.6.1.1}
WRITE_WHL: "true" # 添加环境变量,默认值为 "true"
image: nvcr.io/nvidia/pytorch:24.02-py3
logging:
driver: json-file
options:
max-size: "100m"
max-file: "10"

services:
build-lmdeploy-whl-amd64-01:
<<: *node-common
container_name: build-lmdeploy-whl-amd64-01
volumes:
- ./../:/ant_lmdeploy
- ./lmdeploy_build:/lmdeploy_build
- ./docker_build_cache:/docker_build_cache
- ./../builder/manywheel/entrypoint_build_ngc.sh:/entrypoint_build.sh
entrypoint: sh /entrypoint_build.sh

7 changes: 7 additions & 0 deletions lmdeploy/cli/startup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from lmdeploy.patch.monkey_patch import patch_all; patch_all()
print("patching startup.py")
from .entrypoint import run

if __name__ == '__main__':
run()
Empty file added lmdeploy/patch/__init__.py
Empty file.
92 changes: 92 additions & 0 deletions lmdeploy/patch/monkey_patch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import logging

from argparse import _SubParsersAction, ArgumentParser
from functools import cache
from lmdeploy.cli.serve import SubCliServe
from lmdeploy.serve.openai import api_server as openai_api_serve
from lmdeploy.serve.openai.protocol import StreamOptions

logger = logging.getLogger(__name__)

class ApiServeCliContext:
args = None

def _set_api_serve_cli_context(args):
"""Set the API serve CLI context with the provided arguments."""
ApiServeCliContext.args = args

@cache
def get_stream_include_usage_status():
"""Get the status of whether to include stream usage data in the output."""
try:
return ApiServeCliContext.args.enable_stream_include_usage
except:
return False

_origin_parse_args = ArgumentParser.parse_args
def _patch_parse_args(self, args=None, namespace=None):
"""Patch the parse_args method to set the API serve CLI context if the command is 'serve'."""
parser_args = _origin_parse_args(self, args=args, namespace=namespace)
command = getattr(parser_args, "command", None)
if command != "serve":
return parser_args
_set_api_serve_cli_context(parser_args)

return parser_args


def get_api_serve_cli_context():
"""Get the API serve CLI context parser."""
api_server_parser = SubCliServe.subparsers.choices['api_server']
return api_server_parser


_origin_api_serve_check_request = openai_api_serve.check_request
def _patch_api_serve_check_request(request):
"""Patch the check_request method to include stream usage data if enabled."""
enable_stream_include_usage_status = get_stream_include_usage_status()
if enable_stream_include_usage_status:
stream_options = getattr(request, "stream_options", None)
if not stream_options:
stream_options = StreamOptions(include_usage=True)
request.stream_options = stream_options

r = _origin_api_serve_check_request(request)
return r


def _patch_api_server_add_parser(parser):
"""Patch the API server parser to add the --enable-stream-include-usage argument."""
parser.add_argument(
"--enable-stream-include-usage",
action="store_true",
help="Enable the inclusion of stream usage data in the output, useful for monitoring performance.",
)
return parser


def _patch_sub_parser_add_parser(self, name, **kwargs):
"""Patch the subparser add_parser method to include the --enable-stream-include-usage argument for 'api_server'."""
_parser = self._origin_sub_parser_add_parser(name, **kwargs)

if name != "api_server":
return _parser
logger.info("patching api_server add_parser")
_parser = _patch_api_server_add_parser(_parser)

return _parser


def patch_all():
"""Apply all monkey patches."""
logger.info("monkey patching all")

# Patch ArgumentParser
_SubParsersAction._origin_sub_parser_add_parser = _SubParsersAction.add_parser
_SubParsersAction.add_parser = _patch_sub_parser_add_parser

# Patch openai_api_serve
openai_api_serve.check_request = _patch_api_serve_check_request

# Patch ArgumentParser
ArgumentParser.parse_args = _patch_parse_args

0 comments on commit 58f1ae6

Please sign in to comment.