From b925952c92be50caaa8e42527df5a85fdb9094a8 Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 28 Jun 2023 19:13:29 +0000 Subject: [PATCH 1/8] Update instructions to build with nvidia cuda runtime image for docker --- docker/README.md | 16 ++++++++++++++-- docker/build_image.sh | 14 ++++++++++++++ docs/performance_guide.md | 4 ++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/docker/README.md b/docker/README.md index fc65749532..a94df11fb9 100644 --- a/docker/README.md +++ b/docker/README.md @@ -34,6 +34,7 @@ Use `build_image.sh` script to build the docker images. The script builds the `p |-h, --help|Show script help| |-b, --branch_name|Specify a branch name to use. Default: master | |-g, --gpu|Build image with GPU based ubuntu base image| +|-bi, --baseimage specify base docker image. Example: nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04| |-bt, --buildtype|Which type of docker image to build. Can be one of : production, dev, codebuild| |-t, --tag|Tag name for image. If not specified, script uses torchserve default tag names.| |-cv, --cudaversion| Specify to cuda version to use. Supported values `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118`. Default `cu117`| @@ -52,10 +53,12 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc ./build_image.sh ``` - - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117` + - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` + + - GPU images are built with NVIDIA CUDA base image. If you want to use ONNX, DeepSpeed, TensorRT, please specify the base image as shown in the next section. ```bash - ./build_image.sh -g -cv cu102 + ./build_image.sh -g -cv cu117 ``` - To create an image with a custom tag @@ -64,6 +67,15 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc ./build_image.sh -t torchserve:1.0 ``` +**NVIDIA CUDA RUNTIME BASE IMAGE** + +To make use of ONNX, TensorRT, DeepSpeed, we need to use [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image. +This will increase the size of your Docker Image + +```bash + ./build_image.sh -bi nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 -g -cv cu117 + ``` + **DEVELOPER ENVIRONMENT IMAGES** Creates a docker image with `torchserve` and `torch-model-archiver` installed from source. diff --git a/docker/build_image.sh b/docker/build_image.sh index 7ced5049bf..79853909a3 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -7,6 +7,8 @@ BRANCH_NAME="master" DOCKER_TAG="pytorch/torchserve:latest-cpu" BUILD_TYPE="production" BASE_IMAGE="ubuntu:20.04" +USER_BASE_IMAGE="ubuntu:20.04" +UPDATE_BASE_IMAGE=false USE_CUSTOM_TAG=false CUDA_VERSION="" USE_LOCAL_SERVE_FOLDER=false @@ -21,6 +23,7 @@ do echo "-h, --help show brief help" echo "-b, --branch_name=BRANCH_NAME specify a branch_name to use" echo "-g, --gpu specify to use gpu" + echo "-bi, --baseimage specify base docker image. Example: nvidia/cuda:11.7.0-cudnn8-runtime-ubuntu20.04 " echo "-bt, --buildtype specify to created image for codebuild. Possible values: production, dev, codebuild." echo "-cv, --cudaversion specify to cuda version to use" echo "-t, --tag specify tag name for docker image" @@ -47,6 +50,12 @@ do CUDA_VERSION="cu117" shift ;; + -bi|--baseimage) + USER_BASE_IMAGE="$2" + UPDATE_BASE_IMAGE=true + shift + shift + ;; -bt|--buildtype) BUILD_TYPE="$2" shift @@ -135,6 +144,11 @@ then DOCKER_TAG=${CUSTOM_TAG} fi +if [ "$UPDATE_BASE_IMAGE" = true ] +then + BASE_IMAGE=${USER_BASE_IMAGE} +fi + if [ "${BUILD_TYPE}" == "production" ] then DOCKER_BUILDKIT=1 docker build --file Dockerfile --build-arg BASE_IMAGE="${BASE_IMAGE}" --build-arg CUDA_VERSION="${CUDA_VERSION}" --build-arg PYTHON_VERSION="${PYTHON_VERSION}" -t "${DOCKER_TAG}" . diff --git a/docs/performance_guide.md b/docs/performance_guide.md index a461a5889e..158f05dfca 100644 --- a/docs/performance_guide.md +++ b/docs/performance_guide.md @@ -16,12 +16,16 @@ At a high level what TorchServe allows you to do is 2. Load those weights from `base_handler.py` using `ort_session = ort.InferenceSession(self.model_pt_path, providers=providers, sess_options=sess_options)` which supports reasonable defaults for both CPU and GPU inference 3. Allow you define custom pre and post processing functions to pass in data in the format your onnx model expects with a custom handler +To use ONNX with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as show [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image) +

TensorRT

TorchServe also supports models optimized via TensorRT. To leverage the TensorRT runtime you can convert your model by [following these instructions](https://github.com/pytorch/TensorRT) and once you're done you'll have serialized weights which you can load with [`torch.jit.load()`](https://pytorch.org/TensorRT/getting_started/getting_started_with_python_api.html#getting-started-with-python-api). After a conversion there is no difference in how PyTorch treats a Torchscript model vs a TensorRT model. +To use ONNX with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as show [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image) +

Better Transformer

Better Transformer from PyTorch implements a backwards-compatible fast path of `torch.nn.TransformerEncoder` for Transformer Encoder Inference and does not require model authors to modify their models. BetterTransformer improvements can exceed 2x in speedup and throughput for many common execution scenarios. From 8898cc9c8c51c2bfcb99bead45bc5f5575380aa3 Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 28 Jun 2023 21:12:26 +0000 Subject: [PATCH 2/8] updated deepspeed documentation --- docs/performance_guide.md | 3 +-- examples/large_models/deepspeed/Readme.md | 4 ++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/performance_guide.md b/docs/performance_guide.md index 158f05dfca..6afd6b80b0 100644 --- a/docs/performance_guide.md +++ b/docs/performance_guide.md @@ -16,7 +16,7 @@ At a high level what TorchServe allows you to do is 2. Load those weights from `base_handler.py` using `ort_session = ort.InferenceSession(self.model_pt_path, providers=providers, sess_options=sess_options)` which supports reasonable defaults for both CPU and GPU inference 3. Allow you define custom pre and post processing functions to pass in data in the format your onnx model expects with a custom handler -To use ONNX with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as show [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image) +To use ONNX with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as shown [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image)

TensorRT

@@ -24,7 +24,6 @@ TorchServe also supports models optimized via TensorRT. To leverage the TensorRT After a conversion there is no difference in how PyTorch treats a Torchscript model vs a TensorRT model. -To use ONNX with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as show [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image)

Better Transformer

diff --git a/examples/large_models/deepspeed/Readme.md b/examples/large_models/deepspeed/Readme.md index 4347e2758d..690730d500 100644 --- a/examples/large_models/deepspeed/Readme.md +++ b/examples/large_models/deepspeed/Readme.md @@ -44,3 +44,7 @@ torchserve --start --ncs --model-store model_store --models opt.tar.gz ```bash curl "http://localhost:8080/predictions/opt" -T sample_text.txt ``` + +### Running using TorchServe Docker Image + +To use DeepSpeed with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA dev ](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as shown [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image) \ No newline at end of file From 7ff353b045a458e6da5d94f83ca05337fcab6655 Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 28 Jun 2023 21:14:03 +0000 Subject: [PATCH 3/8] updated deepspeed documentation --- docs/performance_guide.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/performance_guide.md b/docs/performance_guide.md index 6afd6b80b0..85a903b442 100644 --- a/docs/performance_guide.md +++ b/docs/performance_guide.md @@ -24,7 +24,6 @@ TorchServe also supports models optimized via TensorRT. To leverage the TensorRT After a conversion there is no difference in how PyTorch treats a Torchscript model vs a TensorRT model. -

Better Transformer

Better Transformer from PyTorch implements a backwards-compatible fast path of `torch.nn.TransformerEncoder` for Transformer Encoder Inference and does not require model authors to modify their models. BetterTransformer improvements can exceed 2x in speedup and throughput for many common execution scenarios. From 7a488e7882d233d2e8d7e6ea4699d68b9c3d8cb7 Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 28 Jun 2023 21:17:50 +0000 Subject: [PATCH 4/8] updated deepspeed documentation --- docker/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/README.md b/docker/README.md index a94df11fb9..82c0143983 100644 --- a/docker/README.md +++ b/docker/README.md @@ -55,7 +55,7 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc - To create a GPU based image with cuda 10.2. Options are `cu92`, `cu101`, `cu102`, `cu111`, `cu113`, `cu116`, `cu117`, `cu118` - - GPU images are built with NVIDIA CUDA base image. If you want to use ONNX, DeepSpeed, TensorRT, please specify the base image as shown in the next section. + - GPU images are built with NVIDIA CUDA base image. If you want to use ONNX, please specify the base image as shown in the next section. ```bash ./build_image.sh -g -cv cu117 @@ -69,7 +69,7 @@ Creates a docker image with publicly available `torchserve` and `torch-model-arc **NVIDIA CUDA RUNTIME BASE IMAGE** -To make use of ONNX, TensorRT, DeepSpeed, we need to use [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image. +To make use of ONNX, we need to use [NVIDIA CUDA runtime](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image. This will increase the size of your Docker Image ```bash From fce9d592cee730227bfb7010889c5108469f6e92 Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 28 Jun 2023 21:47:00 +0000 Subject: [PATCH 5/8] added example command --- examples/large_models/deepspeed/Readme.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/examples/large_models/deepspeed/Readme.md b/examples/large_models/deepspeed/Readme.md index 690730d500..13ab6f40f4 100644 --- a/examples/large_models/deepspeed/Readme.md +++ b/examples/large_models/deepspeed/Readme.md @@ -47,4 +47,9 @@ curl "http://localhost:8080/predictions/opt" -T sample_text.txt ### Running using TorchServe Docker Image -To use DeepSpeed with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA dev ](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as shown [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image) \ No newline at end of file +To use DeepSpeed with GPU on TorchServe Docker, we need to build an image with [NVIDIA CUDA dev ](https://github.com/NVIDIA/nvidia-docker/wiki/CUDA) as the base image as shown [here](https://github.com/pytorch/serve/blob/master/docker/README.md#create-torchserve-docker-image) + +Example: +``` +./build_image.sh -bi nvidia/cuda:11.7.0-devel-ubuntu20.04 -g -cv cu117 -t pytorch/torchserve:latest-gpu +``` From b25e98563fe3000f89f7d9653d67e734fb5b4df0 Mon Sep 17 00:00:00 2001 From: agunapal Date: Mon, 17 Jul 2023 17:42:30 +0000 Subject: [PATCH 6/8] Lint failure --- .github/workflows/regression_tests_docker.yml | 5 ++--- ts_scripts/spellcheck_conf/wordlist.txt | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/regression_tests_docker.yml b/.github/workflows/regression_tests_docker.yml index afb2758593..4f952b2382 100644 --- a/.github/workflows/regression_tests_docker.yml +++ b/.github/workflows/regression_tests_docker.yml @@ -1,10 +1,9 @@ name: Run Regression Tests on Docker on: - push # run every day at 5:15am - #schedule: - # - cron: '15 5 * * *' + schedule: + - cron: '15 5 * * *' concurrency: group: ci-cpu-${{ github.workflow }}-${{ github.ref == 'refs/heads/master' && github.run_number || github.ref }} diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index f6474c9370..b74c47fc5e 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -1064,3 +1064,4 @@ ActionSLAM statins ci chatGPT +baseimage From b4df63eb2d10f9c5c00fdee878e976ce67a030cc Mon Sep 17 00:00:00 2001 From: agunapal Date: Tue, 25 Jul 2023 20:44:15 +0000 Subject: [PATCH 7/8] changed variable name --- docker/build_image.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/build_image.sh b/docker/build_image.sh index bacdc124fb..c39a357c96 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -7,7 +7,7 @@ BRANCH_NAME="master" DOCKER_TAG="pytorch/torchserve:latest-cpu" BUILD_TYPE="production" BASE_IMAGE="ubuntu:20.04" -USER_BASE_IMAGE="ubuntu:20.04" +OVERRIDE_BASE_IMAGE="ubuntu:20.04" UPDATE_BASE_IMAGE=false USE_CUSTOM_TAG=false CUDA_VERSION="" @@ -53,7 +53,7 @@ do shift ;; -bi|--baseimage) - USER_BASE_IMAGE="$2" + OVERRIDE_BASE_IMAGE="$2" UPDATE_BASE_IMAGE=true shift shift @@ -152,7 +152,7 @@ fi if [ "$UPDATE_BASE_IMAGE" = true ] then - BASE_IMAGE=${USER_BASE_IMAGE} + BASE_IMAGE=${OVERRIDE_BASE_IMAGE} fi if [ "${BUILD_TYPE}" == "production" ] From f8a37b651da18129758340ed734478fd6730615f Mon Sep 17 00:00:00 2001 From: agunapal Date: Wed, 26 Jul 2023 18:47:12 +0000 Subject: [PATCH 8/8] Exit if -bi and -g are specified --- docker/build_image.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/build_image.sh b/docker/build_image.sh index c39a357c96..591c2244e0 100755 --- a/docker/build_image.sh +++ b/docker/build_image.sh @@ -7,7 +7,6 @@ BRANCH_NAME="master" DOCKER_TAG="pytorch/torchserve:latest-cpu" BUILD_TYPE="production" BASE_IMAGE="ubuntu:20.04" -OVERRIDE_BASE_IMAGE="ubuntu:20.04" UPDATE_BASE_IMAGE=false USE_CUSTOM_TAG=false CUDA_VERSION="" @@ -53,7 +52,7 @@ do shift ;; -bi|--baseimage) - OVERRIDE_BASE_IMAGE="$2" + BASE_IMAGE="$2" UPDATE_BASE_IMAGE=true shift shift @@ -150,9 +149,10 @@ then DOCKER_TAG=${CUSTOM_TAG} fi -if [ "$UPDATE_BASE_IMAGE" = true ] +if [[ $UPDATE_BASE_IMAGE == true && $MACHINE == "gpu" ]]; then - BASE_IMAGE=${OVERRIDE_BASE_IMAGE} + echo "Incompatible options: -bi doesn't work with -g option" + exit 1 fi if [ "${BUILD_TYPE}" == "production" ]