triton-inference-server · Tabrizian · Sep 26, 2023 · Sep 21, 2023 · Sep 25, 2023 · Sep 26, 2023
diff --git a/qa/L0_scalar_io/scalar_test.py b/qa/L0_scalar_io/scalar_test.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+
+import os
+import unittest
+
+import numpy as np
+import test_util as tu
+import tritonclient.grpc as grpcclient
+from tritonclient.utils import np_to_triton_dtype
+
+
+class ScalarIOTest(tu.TestResultCollector):
+    def setUp(self):
+        self._client = grpcclient.InferenceServerClient(url="localhost:8001")
+        self._backends = os.environ.get("BACKENDS", "onnx").split(",")
+
+    def _send_request_and_verify_result(self, input, model_name):
+        inputs = []
+        inputs.append(
+            grpcclient.InferInput("INPUT", input.shape, np_to_triton_dtype(input.dtype))
+        )
+        inputs[-1].set_data_from_numpy(input)
+        result = self._client.infer(inputs=inputs, model_name=model_name)
+        output = result.as_numpy("OUTPUT")
+        np.testing.assert_allclose(input, output)
+
+    def test_scalar_io(self):
+        for backend in self._backends:
+            model_name = f"{backend}_scalar_1dim"
+            self._send_request_and_verify_result(
+                np.asarray([1], dtype=np.float32), model_name
+            )
+
+            model_name = f"{backend}_scalar_2dim"
+            self._send_request_and_verify_result(
+                np.asarray([[1]], dtype=np.float32), model_name
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/L0_scalar_io/test.sh b/qa/L0_scalar_io/test.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [ "$#" -ge 1 ]; then
+    REPO_VERSION=$1
+fi
+if [ -z "$REPO_VERSION" ]; then
+    echo -e "Repository version must be specified"
+    echo -e "\n***\n*** Test Failed\n***"
+    exit 1
+fi
+if [ ! -z "$TEST_REPO_ARCH" ]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+RET=0
+TEST_RESULT_FILE='test_results.txt'
+BACKENDS="onnx"
+export CUDA_VISIBLE_DEVICES=0
+DATADIR=/data/inferenceserver/${REPO_VERSION}
+
+rm -rf models
+mkdir models
+cp -r $DATADIR/qa_scalar_models/* models/
+
+CLIENT_LOG="./client.log"
+SCALAR_TEST=scalar_test.py
+source ../common/util.sh
+
+SERVER=/opt/tritonserver/bin/tritonserver
+SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_LOG="./inference_server.log"
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+python3 $SCALAR_TEST >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    echo -e "\n***\n*** scalar_test.py FAILED. \n***"
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
+    RET=1
+fi
+
+kill $SERVER_PID
+wait $SERVER_PID
+
+# Make sure the server fails loading the model if it has a dimension higher than
+# 1
+sed -i "s/dims.*/dims:\[2\]/g" models/onnx_scalar_1dim/config.pbtxt
+run_server
+if [ "$SERVER_PID" != "0" ]; then
+    echo -e "\n***\n*** Expected the server to fail loading \n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+if [ $RET -eq 0 ]; then
+    echo -e "\n***\n*** Test Passed\n***"
+else
+    echo -e "\n***\n*** Test FAILED\n***"
+fi
+
+exit $RET
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
@@ -91,6 +91,7 @@ HOST_VARIMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_
 HOST_INITIALSTATEIMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_sequence_initial_state_implicit_model_repository
 HOST_VARINITIALSTATEIMPLICITSEQDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_variable_sequence_initial_state_implicit_model_repository
 HOST_TORCHTRTDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/torchtrt_model_store
+HOST_SCALARMODELSDESTDIR=$HOST_BUILD_DIR/$TRITON_VERSION/qa_scalar_models
 
 rm -fr $HOST_SRCDIR $HOST_DESTDIR $HOST_VARDESTDIR $HOST_TFPARAMETERSDESTDIR
 rm -fr $HOST_IDENTITYDESTDIR $HOST_IDENTITYBIGDESTDIR $HOST_SHAPEDESTDIR $HOST_SIGDEFDESTDIR
@@ -99,6 +100,7 @@ rm -fr $HOST_ENSEMBLEDESTDIR $HOST_NOSHAPEDESTDIR $HOST_RESHAPEDESTDIR
 rm -fr $HOST_PLGDESTDIR $HOST_RAGGEDDESTDIR $HOST_FORMATDESTDIR $HOST_DATADEPENDENTDIR
 rm -rf $HOST_IMPLICITSEQDESTDIR $HOST_VARIMPLICITSEQDESTDIR $HOST_DYNASEQIMPLICITDESTDIR
 rm -rf $HOST_VARINITIALSTATEIMPLICITSEQDESTDIR $HOST_INITIALSTATEIMPLICITSEQDESTDIR
+rm -rf $HOST_SCALARMODELSDESTDIR
 mkdir -p $HOST_SRCDIR
 mkdir -p $HOST_DESTDIR
 mkdir -p $HOST_VARDESTDIR
@@ -123,6 +125,7 @@ mkdir -p $HOST_VARIMPLICITSEQDESTDIR
 mkdir -p $HOST_INITIALSTATEIMPLICITSEQDESTDIR
 mkdir -p $HOST_VARINITIALSTATEIMPLICITSEQDESTDIR
 mkdir -p $HOST_TORCHTRTDESTDIR
+mkdir -p $HOST_SCALARMODELSDESTDIR
 
 # Since the models required by ensemble models may not be available
 # at this point, storing ensemble models separately so that other qa directories
@@ -155,6 +158,7 @@ cp ./test_util.py $HOST_SRCDIR/.
 cp ./gen_tag_sigdef.py $HOST_SRCDIR/.
 cp ./gen_qa_tf_parameters.py $HOST_SRCDIR/.
 cp ./gen_common.py $HOST_SRCDIR/.
+cp ./gen_qa_ort_scalar_models.py $HOST_SRCDIR/.
 
 ONNXSCRIPT=onnx_gen.cmds
 OPENVINOSCRIPT=openvino_gen.cmds
@@ -186,6 +190,7 @@ RAGGEDDESTDIR=/tmp/raggedmodels
 FORMATDESTDIR=/tmp/formatmodels
 DATADEPENDENTDIR=/tmp/datadependentmodels
 TORCHTRTDESTDIR=/tmp/torchtrtmodels
+SCALARMODELSDESTDIR=/tmp/scalarmodels
 
 # OPENVINO
 #
@@ -243,6 +248,7 @@ docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$OPENVINOSCRIPT \
        --mount type=bind,source=$HOST_DYNASEQDESTDIR,target=$DYNASEQDESTDIR \
        --mount type=bind,source=$HOST_VARSEQDESTDIR,target=$VARSEQDESTDIR \
        --mount type=bind,source=$HOST_RAGGEDDESTDIR,target=$RAGGEDDESTDIR \
+       --mount type=bind,source=$HOST_SCALARMODELSDESTDIR,target=$SCALARMODELSDESTDIR \
        $UBUNTU_IMAGE
 if [ $? -ne 0 ]; then
     echo -e "Failed"
@@ -293,6 +299,8 @@ python3 $SRCDIR/gen_qa_dyna_sequence_implicit_models.py --onnx --onnx_opset=$ONN
 chmod -R 777 $DYNASEQIMPLICITDESTDIR
 python3 $SRCDIR/gen_qa_ragged_models.py --onnx --onnx_opset=$ONNX_OPSET --models_dir=$RAGGEDDESTDIR
 chmod -R 777 $RAGGEDDESTDIR
+python3 $SRCDIR/gen_qa_ort_scalar_models.py --onnx_opset=$ONNX_OPSET --models_dir=$SCALARMODELSDESTDIR
+chmod -R 777 $RAGGEDDESTDIR
 EOF
 
 chmod a+x $HOST_SRCDIR/$ONNXSCRIPT
@@ -317,6 +325,7 @@ docker run $DOCKER_GPU_ARGS --rm --entrypoint $SRCDIR/$ONNXSCRIPT \
        --mount type=bind,source=$HOST_IMPLICITSEQDESTDIR,target=$IMPLICITSEQDESTDIR \
        --mount type=bind,source=$HOST_VARINITIALSTATEIMPLICITSEQDESTDIR,target=$VARINITIALSTATEIMPLICITSEQDESTDIR \
        --mount type=bind,source=$HOST_INITIALSTATEIMPLICITSEQDESTDIR,target=$INITIALSTATEIMPLICITSEQDESTDIR \
+       --mount type=bind,source=$HOST_SCALARMODELSDESTDIR,target=$SCALARMODELSDESTDIR \
        $UBUNTU_IMAGE
 if [ $? -ne 0 ]; then
     echo -e "Failed"

diff --git a/qa/common/gen_qa_ort_scalar_models.py b/qa/common/gen_qa_ort_scalar_models.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+
+# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+
+sys.path.append("../common")
+import argparse
+import os
+
+import numpy as np
+import onnx
+import test_util as tu
+from gen_common import np_to_model_dtype, np_to_onnx_dtype
+
+
+def create_onnx_modelfile(models_dir, shape, dtype, model_version=1):
+    onnx_io_dtype = np_to_onnx_dtype(dtype)
+
+    # Create the model
+    model_name = f"onnx_scalar_{len(shape)}dim"
+    model_version_dir = models_dir + "/" + model_name + "/" + str(model_version)
+
+    input = onnx.helper.make_tensor_value_info("INPUT", onnx_io_dtype, None)
+
+    output = onnx.helper.make_tensor_value_info("OUTPUT", onnx_io_dtype, None)
+
+    identity = onnx.helper.make_node("Identity", ["INPUT"], ["OUTPUT"])
+
+    onnx_nodes = [identity]
+    onnx_inputs = [input]
+    onnx_outputs = [output]
+
+    graph_proto = onnx.helper.make_graph(
+        onnx_nodes, model_name, onnx_inputs, onnx_outputs
+    )
+    if FLAGS.onnx_opset > 0:
+        model_opset = onnx.helper.make_operatorsetid("", FLAGS.onnx_opset)
+        model_def = onnx.helper.make_model(
+            graph_proto, producer_name="triton", opset_imports=[model_opset]
+        )
+    else:
+        model_def = onnx.helper.make_model(graph_proto, producer_name="triton")
+
+    try:
+        os.makedirs(model_version_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    onnx.save(model_def, model_version_dir + "/model.onnx")
+
+
+def create_onnx_modelconfig(models_dir, dtype, shape):
+    # Create the model
+    model_name = f"onnx_scalar_{len(shape)}dim"
+    config_dir = models_dir + "/" + model_name
+
+    config = """
+input [
+  {{
+    name: "INPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+output [
+  {{
+    name: "OUTPUT"
+    data_type: {}
+    dims: [ {} ]
+  }}
+]
+""".format(
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+        np_to_model_dtype(dtype),
+        tu.shape_to_dims_str(shape),
+    )
+
+    try:
+        os.makedirs(config_dir)
+    except OSError as ex:
+        pass  # ignore existing dir
+
+    with open(config_dir + "/config.pbtxt", "w") as cfile:
+        cfile.write(config)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--models_dir", type=str, required=True, help="Top-level model directory"
+    )
+    parser.add_argument(
+        "--onnx_opset",
+        type=int,
+        required=False,
+        default=0,
+        help="Opset used for Onnx models. Default is to use ONNXRT default",
+    )
+
+    FLAGS = parser.parse_args()
+    create_onnx_modelfile(FLAGS.models_dir, shape=[1], dtype=np.float32)
+    create_onnx_modelconfig(FLAGS.models_dir, shape=[1], dtype=np.float32)
+    create_onnx_modelfile(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)
+    create_onnx_modelconfig(FLAGS.models_dir, shape=[1, 1], dtype=np.float32)