Update README and docs post-23.04 release (#5707)

* Update README and versions for 23.04 branch * Update README and docs post-23.04 release * Update 'main' to track development for 2.34.0 / r23.05 * Update version references to 23.04 * Fix URL address --------- Co-authored-by: Tanmay Verma <tanmay2592@gmail.com>
triton-inference-server · Apr 27, 2023 · ec957f4 · nnshah1 · Apr 27, 2023 · ec957f4
1 parent a186b44
commit ec957f4
Show file tree

Hide file tree

Showing 25 changed files with 51 additions and 51 deletions.
diff --git a/Dockerfile.sdk b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.03-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:23.04-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main

diff --git a/README.md b/README.md
@@ -32,8 +32,8 @@
 
 **LATEST RELEASE: You are currently on the main branch which tracks
 under-development progress towards the next release. The current release is 
-version [2.32.0](https://github.com/triton-inference-server/server/tree/r23.03)
-and corresponds to the 23.03 container release on 
+version [2.33.0](https://github.com/triton-inference-server/server/tree/r23.04)
+and corresponds to the 23.04 container release on 
 [NVIDIA GPU Cloud (NGC)](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tritonserver).**
 
 ----
@@ -88,16 +88,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository 
-git clone -b r23.03 https://github.com/triton-inference-server/server.git
+git clone -b r23.04 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.03-py3 tritonserver --model-repository=/models
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:23.04-py3 tritonserver --model-repository=/models
 
 # Step 3: Sending an Inference Request 
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.03-py3-sdk
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:23.04-py3-sdk
 /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following
@@ -257,4 +257,4 @@ For questions, we recommend posting in our community
 ## For more information
 
 Please refer to the [NVIDIA Developer Triton page](https://developer.nvidia.com/nvidia-triton-inference-server)
-for more information.
+for more information.
diff --git a/TRITON_VERSION b/TRITON_VERSION
@@ -1 +1 @@
-2.33.0dev
+2.34.0dev
diff --git a/build.py b/build.py
@@ -67,9 +67,9 @@
 # incorrectly load the other version of the openvino libraries.
 #
 TRITON_VERSION_MAP = {
-    '2.33.0dev': (
-        '23.04dev',  # triton container
-        '23.03',  # upstream container
+    '2.34.0dev': (
+        '23.04',  # triton container
+        '23.04dev',  # upstream container
         '1.14.1',  # ORT
         '2022.1.0',  # ORT OpenVINO
         '2022.1.0',  # Standalone OpenVINO

diff --git a/deploy/aws/values.yaml b/deploy/aws/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:23.03-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.04-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1

diff --git a/deploy/fleetcommand/Chart.yaml b/deploy/fleetcommand/Chart.yaml
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.32.0"
+appVersion: "2.33.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart

diff --git a/deploy/fleetcommand/values.yaml b/deploy/fleetcommand/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:23.03-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.04-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -46,13 +46,13 @@ image:
     # Model Control Mode (Optional, default: none)
     #
     # To set model control mode, uncomment and configure below
-    # See https://github.com/triton-inference-server/server/blob/r23.03/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r23.04/docs/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r23.03/README.md
+    # see https://github.com/triton-inference-server/server/blob/r23.04/README.md
     #  for more details
 
 service:

diff --git a/deploy/gcp/values.yaml b/deploy/gcp/values.yaml
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:23.03-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.04-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1

diff --git a/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml b/deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:23.03-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:23.04-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:

diff --git a/deploy/gke-marketplace-app/server-deployer/build_and_push.sh b/deploy/gke-marketplace-app/server-deployer/build_and_push.sh
@@ -26,9 +26,9 @@
 
 export REGISTRY=gcr.io/$(gcloud config get-value project | tr ':' '/')
 export APP_NAME=tritonserver
-export MAJOR_VERSION=2.32
-export MINOR_VERSION=2.32.0
-export NGC_VERSION=23.03-py3
+export MAJOR_VERSION=2.33
+export MINOR_VERSION=2.33.0
+export NGC_VERSION=23.04-py3
 
 docker pull nvcr.io/nvidia/$APP_NAME:$NGC_VERSION
 

diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/Chart.yaml
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 apiVersion: v1
-appVersion: "2.32"
+appVersion: "2.33"
 description: Triton Inference Server
 name: triton-inference-server
-version: 2.32.0
+version: 2.33.0
diff --git a/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml b/deploy/gke-marketplace-app/server-deployer/chart/triton/values.yaml
@@ -31,14 +31,14 @@ maxReplicaCount: 3
 tritonProtocol: HTTP
 # HPA GPU utilization autoscaling target
 HPATargetAverageValue: 85
-modelRepositoryPath: gs://triton_sample_models/23_02
-publishedVersion: '2.31.0'
+modelRepositoryPath: gs://triton_sample_models/23_04
+publishedVersion: '2.33.0'
 gcpMarketplace: true
 
 image:
   registry: gcr.io
   repository: nvidia-ngc-public/tritonserver
-  tag: 23.03-py3
+  tag: 23.04-py3
   pullPolicy: IfNotPresent
   # modify the model repository here to match your GCP storage bucket
   numGpus: 1

diff --git a/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml b/deploy/gke-marketplace-app/server-deployer/data-test/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.32.0'
+  publishedVersion: '2.33.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.

diff --git a/deploy/gke-marketplace-app/server-deployer/schema.yaml b/deploy/gke-marketplace-app/server-deployer/schema.yaml
@@ -27,7 +27,7 @@
 x-google-marketplace:
   schemaVersion: v2
   applicationApiVersion: v1beta1
-  publishedVersion: '2.32.0'
+  publishedVersion: '2.33.0'
   publishedVersionMetadata:
     releaseNote: >-
       Initial release.

diff --git a/deploy/gke-marketplace-app/trt-engine/README.md b/deploy/gke-marketplace-app/trt-engine/README.md
@@ -33,7 +33,7 @@
 ```
 docker run --gpus all -it --network host \
     --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-    -v ~:/scripts nvcr.io/nvidia/tensorrt:23.02-py3 
+    -v ~:/scripts nvcr.io/nvidia/tensorrt:23.04-py3 
 
 pip install onnx six torch tf2onnx tensorflow 
 

diff --git a/deploy/k8s-onprem/values.yaml b/deploy/k8s-onprem/values.yaml
@@ -29,7 +29,7 @@ tags:
   loadBalancing: true
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:21.10-py3
+  imageName: nvcr.io/nvidia/tritonserver:23.04-py3
   pullPolicy: IfNotPresent
   modelRepositoryServer: < Replace with the IP Address of your file server >
   modelRepositoryPath: /srv/models

diff --git a/docs/customization_guide/build.md b/docs/customization_guide/build.md
@@ -173,7 +173,7 @@ $ ./build.py ... --repo-tag=common:<container tag> --repo-tag=core:<container ta
 
 If you are building on a release branch then `<container tag>` will
 default to the branch name. For example, if you are building on the
-r23.03 branch, `<container tag>` will default to r23.03. If you are
+r23.04 branch, `<container tag>` will default to r23.04. If you are
 building on any other branch (including the *main* branch) then
 `<container tag>` will default to "main". Therefore, you typically do
 not need to provide `<container tag>` at all (nor the preceding
@@ -334,8 +334,8 @@ python build.py --cmake-dir=<path/to/repo>/build --build-dir=/tmp/citritonbuild
 If you are building on *main* branch then '<container tag>' will
 default to "main". If you are building on a release branch then
 '<container tag>' will default to the branch name. For example, if you
-are building on the r23.03 branch, '<container tag>' will default to
-r23.03. Therefore, you typically do not need to provide '<container
+are building on the r23.04 branch, '<container tag>' will default to
+r23.04. Therefore, you typically do not need to provide '<container
 tag>' at all (nor the preceding colon). You can use a different
 '<container tag>' for a component to instead use the corresponding
 branch/tag in the build. For example, if you have a branch called

diff --git a/docs/customization_guide/compose.md b/docs/customization_guide/compose.md
@@ -44,8 +44,8 @@ from source to get more exact customization.
 The `compose.py` script can be found in the [server repository](https://github.com/triton-inference-server/server).
 Simply clone the repository and run `compose.py` to create a custom container.
 Note: Created container version will depend on the branch that was cloned.
-For example branch [r23.03](https://github.com/triton-inference-server/server/tree/r23.03)
-should be used to create a image based on the NGC 23.03 Triton release.
+For example branch [r23.04](https://github.com/triton-inference-server/server/tree/r23.04)
+should be used to create a image based on the NGC 23.04 Triton release.
 
 `compose.py` provides `--backend`, `--repoagent` options that allow you to
 specify which backends and repository agents to include in the custom image.
@@ -62,7 +62,7 @@ will provide a container `tritonserver` locally. You can access the container wi
 $ docker run -it tritonserver:latest
 ```
 
-Note: If `compose.py` is run on release versions `r23.03` and earlier,
+Note: If `compose.py` is run on release versions `r21.08` and earlier,
 the resulting container will have DCGM version 2.2.3 installed.
 This may result in different GPU statistic reporting behavior.
 
@@ -76,19 +76,19 @@ For example, running
 ```
 python3 compose.py --backend tensorflow1 --repoagent checksum
 ```
-on branch [r23.03](https://github.com/triton-inference-server/server/tree/r23.03) pulls:
-- `min` container `nvcr.io/nvidia/tritonserver:23.03-py3-min`
-- `full` container `nvcr.io/nvidia/tritonserver:23.03-py3`
+on branch [r23.04](https://github.com/triton-inference-server/server/tree/r23.04) pulls:
+- `min` container `nvcr.io/nvidia/tritonserver:23.04-py3-min`
+- `full` container `nvcr.io/nvidia/tritonserver:23.04-py3`
 
 Alternatively, users can specify the version of Triton container to pull from any branch by either:
 1. Adding flag `--container-version <container version>` to branch
 ```
-python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.03
+python3 compose.py --backend tensorflow1 --repoagent checksum --container-version 23.04
 ```
 2. Specifying `--image min,<min container image name> --image full,<full container image name>`.
    The user is responsible for specifying compatible `min` and `full` containers.
 ```
-python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.03-py3-min --image full,nvcr.io/nvidia/tritonserver:23.03-py3
+python3 compose.py --backend tensorflow1 --repoagent checksum --image min,nvcr.io/nvidia/tritonserver:23.04-py3-min --image full,nvcr.io/nvidia/tritonserver:23.04-py3
 ```
 Method 1 and 2 will result in the same composed container. Furthermore, `--image` flag overrides the `--container-version` flag when both are specified.
 

diff --git a/docs/customization_guide/test.md b/docs/customization_guide/test.md
@@ -49,7 +49,7 @@ $ ./gen_qa_custom_ops
 ```
 
 This will create multiple model repositories in /tmp/<version>/qa_*
-(for example /tmp/23.03/qa_model_repository).  The TensorRT models
+(for example /tmp/23.04/qa_model_repository).  The TensorRT models
 will be created for the GPU on the system that CUDA considers device 0
 (zero). If you have multiple GPUs on your system see the documentation
 in the scripts for how to target a specific GPU.

diff --git a/docs/index.md b/docs/index.md
@@ -94,4 +94,4 @@ Join the [Triton and TensorRT community](https://www.nvidia.com/en-us/deep-learn
 practices, and more. Need enterprise support? NVIDIA global support is available 
 for Triton Inference Server with the [NVIDIA AI Enterprise software suite](https://www.nvidia.com/en-us/data-center/products/ai-enterprise/).
 
-See the [Lastest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-23-03.html#rel-23-03) for updates on the newest features and bug fixes.
+See the [Lastest Release Notes](https://docs.nvidia.com/deeplearning/triton-inference-server/release-notes/rel-23-04.html#rel-23-04) for updates on the newest features and bug fixes.
diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md
@@ -64,7 +64,7 @@ simple way to ensure you are using the correct version of TensorRT is
 to use the [NGC TensorRT
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorrt)
 corresponding to the Triton container. For example, if you are using
-the 23.03 version of Triton, use the 23.03 version of the TensorRT
+the 23.04 version of Triton, use the 23.04 version of the TensorRT
 container.
 
 ## TensorFlow
@@ -123,7 +123,7 @@ simple way to ensure you are using the correct version of TensorFlow
 is to use the [NGC TensorFlow
 container](https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
 corresponding to the Triton container. For example, if you are using
-the 23.03 version of Triton, use the 23.03 version of the TensorFlow
+the 23.04 version of Triton, use the 23.04 version of the TensorFlow
 container.
 
 ## PyTorch
@@ -167,7 +167,7 @@ simple way to ensure you are using the correct version of PyTorch is
 to use the [NGC PyTorch
 container](https://ngc.nvidia.com/catalog/containers/nvidia:pytorch)
 corresponding to the Triton container. For example, if you are using
-the 23.03 version of Triton, use the 23.03 version of the PyTorch
+the 23.04 version of Triton, use the 23.04 version of the PyTorch
 container.
 
 ## ONNX

diff --git a/docs/user_guide/performance_tuning.md b/docs/user_guide/performance_tuning.md
@@ -157,10 +157,10 @@ These containers can be started interactively instead, but for the sake of demon
 
 ```bash
 # Start server container in the background
-docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.03-py3 
+docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-server nvcr.io/nvidia/tritonserver:23.04-py3 
 
 # Start client container in the background
-docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.03-py3-sdk
+docker run -d --gpus=all --network=host -v $PWD:/mnt --name triton-client nvcr.io/nvidia/tritonserver:23.04-py3-sdk
 ```
 
 > **Note**

diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
@@ -50,7 +50,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=23.03}
+TRITON_VERSION=${TRITON_VERSION:=23.04}
 CUDA_DEVICE=${RUNNER_ID:=0}
 
 HOST_BUILD_DIR=${HOST_BUILD_DIR:=/tmp}

diff --git a/qa/common/gen_qa_custom_ops b/qa/common/gen_qa_custom_ops
@@ -37,7 +37,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=23.03}
+TRITON_VERSION=${TRITON_VERSION:=23.04}
 NVIDIA_UPSTREAM_VERSION=${NVIDIA_UPSTREAM_VERSION:=$TRITON_VERSION}
 TENSORFLOW_IMAGE=${TENSORFLOW_IMAGE:=nvcr.io/nvidia/tensorflow:$NVIDIA_UPSTREAM_VERSION-tf2-py3}
 PYTORCH_IMAGE=${PYTORCH_IMAGE:=nvcr.io/nvidia/pytorch:$NVIDIA_UPSTREAM_VERSION-py3}

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
@@ -48,7 +48,7 @@
 ##
 ############################################################################
 
-TRITON_VERSION=${TRITON_VERSION:=23.03}
+TRITON_VERSION=${TRITON_VERSION:=23.04}
 
 # ONNX. Use ONNX_OPSET 0 to use the default for ONNX version
 ONNX_VERSION=1.10.1