diff --git a/Dockerfile.sdk b/Dockerfile.sdk index adadd11f69..608b27a564 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -29,7 +29,7 @@ # # Base image on the minimum Triton container -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.03-py3-min +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.04-py3-min ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo ARG TRITON_COMMON_REPO_TAG=main diff --git a/README.md b/README.md index 4701662bcb..f0bd69b866 100644 --- a/README.md +++ b/README.md @@ -32,8 +32,8 @@ **LATEST RELEASE: You are currently on the main branch which tracks under-development progress towards the next release. The current release is -version [2.32.0](https://github.com/triton-inference-server/server/tree/r23.03) -and corresponds to the 23.03 container release on +version [2.33.0](https://github.com/triton-inference-server/server/tree/r23.04) +and corresponds to the 23.04 container release on [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).** ---- @@ -88,16 +88,16 @@ Inference Server with the ```bash # Step 1: Create the example model repository -git clone -b r23.03 https://github.com/triton-inference-server/server.git +git clone -b r23.04 https://github.com/triton-inference-server/server.git cd server/docs/examples ./fetch_models.sh # Step 2: Launch triton from the NGC Triton container -docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.03-py3 tritonserver --model-repository=/models +docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models # Step 3: Sending an Inference Request # In a separate console, launch the image_client example from the NGC Triton SDK container -docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.03-py3-sdk +docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.04-py3-sdk /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg # Inference should return the following @@ -257,4 +257,4 @@ For questions, we recommend posting in our community ## For more information Please refer to the [NVIDIA Developer Triton page](https://developer.nvidia.com/nvidia-triton-inference-server) -for more information. +for more information. \ No newline at end of file diff --git a/TRITON_VERSION b/TRITON_VERSION index 6f5d8feae3..1eb66e9313 100644 --- a/TRITON_VERSION +++ b/TRITON_VERSION @@ -1 +1 @@ -2.33.0dev +2.34.0dev diff --git a/build.py b/build.py index 76a63556f7..9db2b84e22 100755 --- a/build.py +++ b/build.py @@ -67,9 +67,9 @@ # incorrectly load the other version of the openvino libraries. # TRITON_VERSION_MAP = { - '2.33.0dev': ( - '23.04dev', # triton container - '23.03', # upstream container + '2.34.0dev': ( + '23.04', # triton container + '23.04dev', # upstream container '1.14.1', # ORT '2022.1.0', # ORT OpenVINO '2022.1.0', # Standalone OpenVINO diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml index b78a85d57e..42256c571a 100644 --- a/deploy/aws/values.yaml +++ b/deploy/aws/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.03-py3 + imageName: nvcr.io/nvidia/tritonserver:23.04-py3 pullPolicy: IfNotPresent modelRepositoryPath: s3://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml index 35be1b3ecd..eb95ccc997 100644 --- a/deploy/fleetcommand/Chart.yaml +++ b/deploy/fleetcommand/Chart.yaml @@ -26,7 +26,7 @@ apiVersion: v1 # appVersion is the Triton version; update when changing release -appVersion: "2.32.0" +appVersion: "2.33.0" description: Triton Inference Server (Fleet Command) name: triton-inference-server # version is the Chart version; update when changing anything in the chart diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml index 3755a02a15..8aac03b247 100644 --- a/deploy/fleetcommand/values.yaml +++ b/deploy/fleetcommand/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.03-py3 + imageName: nvcr.io/nvidia/tritonserver:23.04-py3 pullPolicy: IfNotPresent numGpus: 1 serverCommand: tritonserver @@ -46,13 +46,13 @@ image: # Model Control Mode (Optional, default: none) # # To set model control mode, uncomment and configure below - # See https://github.com/triton-inference-server/server/blob/r23.03/docs/model_management.md + # See https://github.com/triton-inference-server/server/blob/r23.04/docs/model_management.md # for more details #- --model-control-mode=explicit|poll|none # # Additional server args # - # see https://github.com/triton-inference-server/server/blob/r23.03/README.md + # see https://github.com/triton-inference-server/server/blob/r23.04/README.md # for more details service: diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml index 934f3e61de..424e964e4d 100644 --- a/deploy/gcp/values.yaml +++ b/deploy/gcp/values.yaml @@ -27,7 +27,7 @@ replicaCount: 1 image: - imageName: nvcr.io/nvidia/tritonserver:23.03-py3 + imageName: nvcr.io/nvidia/tritonserver:23.04-py3 pullPolicy: IfNotPresent modelRepositoryPath: gs://triton-inference-server-repository/model_repository numGpus: 1 diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml index 75f84a1194..89d032c3d6 100644 --- a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml +++ b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml @@ -33,7 +33,7 @@ metadata: namespace: default spec: containers: - - image: nvcr.io/nvidia/tritonserver:23.03-py3-sdk + - image: nvcr.io/nvidia/tritonserver:23.04-py3-sdk imagePullPolicy: Always name: nv-triton-client securityContext: diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh index 007230899a..543c22a452 100644 --- a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh +++ b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh @@ -26,9 +26,9 @@ export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/') export APP_NAME=tritonserver -export MAJOR_VERSION=2.32 -export MINOR_VERSION=2.32.0 -export NGC_VERSION=23.03-py3 +export MAJOR_VERSION=2.33 +export MINOR_VERSION=2.33.0 +export NGC_VERSION=23.04-py3 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml index 3af4998fd6..b16ea3af48 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml @@ -25,7 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. apiVersion: v1 -appVersion: "2.32" +appVersion: "2.33" description: Triton Inference Server name: triton-inference-server -version: 2.32.0 +version: 2.33.0 diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml index e4eee10064..54867eefc8 100644 --- a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml +++ b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml @@ -31,14 +31,14 @@ maxReplicaCount: 3 tritonProtocol: HTTP # HPA GPU utilization autoscaling target HPATargetAverageValue: 85 -modelRepositoryPath: gs://triton_sample_models/23_02 -publishedVersion: '2.31.0' +modelRepositoryPath: gs://triton_sample_models/23_04 +publishedVersion: '2.33.0' gcpMarketplace: true image: registry: gcr.io repository: nvidia-ngc-public/tritonserver - tag: 23.03-py3 + tag: 23.04-py3 pullPolicy: IfNotPresent # modify the model repository here to match your GCP storage bucket numGpus: 1 diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml index 05900d5c63..7039d263b1 100644 --- a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.32.0' + publishedVersion: '2.33.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml index 18707851ff..c1cac2e2aa 100644 --- a/deploy/gke-marketplace-app/server-deployer/schema.yaml +++ b/deploy/gke-marketplace-app/server-deployer/schema.yaml @@ -27,7 +27,7 @@ x-google-marketplace: schemaVersion: v2 applicationApiVersion: v1beta1 - publishedVersion: '2.32.0' + publishedVersion: '2.33.0' publishedVersionMetadata: releaseNote: >- Initial release. diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md index 717ce84221..8e412094f4 100644 --- a/deploy/gke-marketplace-app/trt-engine/README.md +++ b/deploy/gke-marketplace-app/trt-engine/README.md @@ -33,7 +33,7 @@ ``` docker run --gpus all -it --network host \ --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \ - -v ~:/scripts nvcr.io/nvidia/tensorrt:23.02-py3 + -v ~:/scripts nvcr.io/nvidia/tensorrt:23.04-py3 pip install onnx six torch tf2onnx tensorflow diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml index 6e9e9e142f..b465b2a3ce 100644 --- a/deploy/k8s-onprem/values.yaml +++ b/deploy/k8s-onprem/values.yaml @@ -29,7 +29,7 @@ tags: loadBalancing: true image: - imageName: nvcr.io/nvidia/tritonserver:21.10-py3 + imageName: nvcr.io/nvidia/tritonserver:23.04-py3 pullPolicy: IfNotPresent modelRepositoryServer: < Replace with the IP Address of your file server > modelRepositoryPath: /srv/models diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md index f8349ebdba..febc5dfe46 100644 --- a/docs/customization_guide/build.md +++ b/docs/customization_guide/build.md @@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common: --repo-tag=core:` will default to the branch name. For example, if you are building on the -r23.03 branch, `` will default to r23.03. If you are +r23.04 branch, `` will default to r23.04. If you are building on any other branch (including the *main* branch) then `` will default to "main". Therefore, you typically do not need to provide `` at all (nor the preceding @@ -334,8 +334,8 @@ python build.py --cmake-dir=/build --build-dir=/tmp/citritonbuild If you are building on *main* branch then '' will default to "main". If you are building on a release branch then '' will default to the branch name. For example, if you -are building on the r23.03 branch, '' will default to -r23.03. Therefore, you typically do not need to provide '' will default to +r23.04. Therefore, you typically do not need to provide '' at all (nor the preceding colon). You can use a different '' for a component to instead use the corresponding branch/tag in the build. For example, if you have a branch called diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md index b0bf0e6d0b..bd30ad2cf4 100644 --- a/docs/customization_guide/compose.md +++ b/docs/customization_guide/compose.md @@ -44,8 +44,8 @@ from source to get more exact customization. The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server). Simply clone the repository and run `compose.py` to create a custom container. Note: Created container version will depend on the branch that was cloned. -For example branch [r23.03](https://github.com/triton-inference-server/server/tree/r23.03) -should be used to create a image based on the NGC 23.03 Triton release. +For example branch [r23.04](https://github.com/triton-inference-server/server/tree/r23.04) +should be used to create a image based on the NGC 23.04 Triton release. `compose.py` provides `--backend`, `--repoagent` options that allow you to specify which backends and repository agents to include in the custom image. @@ -62,7 +62,7 @@ will provide a container `tritonserver` locally. You can access the container wi $ docker run -it tritonserver:latest ``` -Note: If `compose.py` is run on release versions `r23.03` and earlier, +Note: If `compose.py` is run on release versions `r21.08` and earlier, the resulting container will have DCGM version 2.2.3 installed. This may result in different GPU statistic reporting behavior. @@ -76,19 +76,19 @@ For example, running ``` python3 compose.py --backend tensorflow1 --repoagent checksum ``` -on branch [r23.03](https://github.com/triton-inference-server/server/tree/r23.03) pulls: -- `min` container `nvcr.io/nvidia/tritonserver:23.03-py3-min` -- `full` container `nvcr.io/nvidia/tritonserver:23.03-py3` +on branch [r23.04](https://github.com/triton-inference-server/server/tree/r23.04) pulls: +- `min` container `nvcr.io/nvidia/tritonserver:23.04-py3-min` +- `full` container `nvcr.io/nvidia/tritonserver:23.04-py3` Alternatively, users can specify the version of Triton container to pull from any branch by either: 1. Adding flag `--container-version ` to branch ``` -python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.03 +python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.04 ``` 2. Specifying `--image min, --image full,`. The user is responsible for specifying compatible `min` and `full` containers. ``` -python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.03-py3-min --image full,nvcr.io/nvidia/tritonserver:23.03-py3 +python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.04-py3-min --image full,nvcr.io/nvidia/tritonserver:23.04-py3 ``` Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified. diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md index 4d47557f78..71d54065d4 100644 --- a/docs/customization_guide/test.md +++ b/docs/customization_guide/test.md @@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops ``` This will create multiple model repositories in /tmp//qa_* -(for example /tmp/23.03/qa_model_repository). The TensorRT models +(for example /tmp/23.04/qa_model_repository). The TensorRT models will be created for the GPU on the system that CUDA considers device 0 (zero). If you have multiple GPUs on your system see the documentation in the scripts for how to target a specific GPU. diff --git a/docs/index.md b/docs/index.md index 5694fc3af8..bff31c78eb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -94,4 +94,4 @@ Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learn practices, and more. Need enterprise support? NVIDIA global support is available for Triton Inference Server with the [NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/). -See the [Lastest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-23-03.html#rel-23-03) for updates on the newest features and bug fixes. +See the [Lastest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-23-04.html#rel-23-04) for updates on the newest features and bug fixes. diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md index f4822abd8b..5d949d2e01 100644 --- a/docs/user_guide/custom_operations.md +++ b/docs/user_guide/custom_operations.md @@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is to use the [NGC TensorRT container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt) corresponding to the Triton container. For example, if you are using -the 23.03 version of Triton, use the 23.03 version of the TensorRT +the 23.04 version of Triton, use the 23.04 version of the TensorRT container. ## TensorFlow @@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow is to use the [NGC TensorFlow container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow) corresponding to the Triton container. For example, if you are using -the 23.03 version of Triton, use the 23.03 version of the TensorFlow +the 23.04 version of Triton, use the 23.04 version of the TensorFlow container. ## PyTorch @@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is to use the [NGC PyTorch container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch) corresponding to the Triton container. For example, if you are using -the 23.03 version of Triton, use the 23.03 version of the PyTorch +the 23.04 version of Triton, use the 23.04 version of the PyTorch container. ## ONNX diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md index 1020b24ecb..fbf5d01706 100644 --- a/docs/user_guide/performance_tuning.md +++ b/docs/user_guide/performance_tuning.md @@ -157,10 +157,10 @@ These containers can be started interactively instead, but for the sake of demon ```bash # Start server container in the background -docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.03-py3 +docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.04-py3 # Start client container in the background -docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.03-py3-sdk +docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.04-py3-sdk ``` > **Note** diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models index d3498920f4..f35b634a69 100755 --- a/qa/common/gen_jetson_trt_models +++ b/qa/common/gen_jetson_trt_models @@ -50,7 +50,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=23.03} +TRITON_VERSION=${TRITON_VERSION:=23.04} CUDA_DEVICE=${RUNNER_ID:=0} HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp} diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops index 7e493987c9..a934e3adad 100755 --- a/qa/common/gen_qa_custom_ops +++ b/qa/common/gen_qa_custom_ops @@ -37,7 +37,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=23.03} +TRITON_VERSION=${TRITON_VERSION:=23.04} NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION} TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3} PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3} diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index cab7776fe0..934853aed0 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -48,7 +48,7 @@ ## ############################################################################ -TRITON_VERSION=${TRITON_VERSION:=23.03} +TRITON_VERSION=${TRITON_VERSION:=23.04} # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version ONNX_VERSION=1.10.1