From 33262a4cb8bc75a9f790a16c5247fb2dd66d71e9 Mon Sep 17 00:00:00 2001
From: GuanLuo <gluo@nvidia.com>
Date: Mon, 24 Apr 2023 16:37:54 -0700
Subject: [PATCH 1/2] Modify timeout test in L0_sequence_batcher to use
 portable backend

---
 .../config.pbtxt                              | 39 ++++++++++++++++---
 .../sequence_batcher_test.py                  | 31 +++++++++------
 qa/L0_sequence_batcher/test.sh                |  3 +-
 3 files changed, 54 insertions(+), 19 deletions(-)
 rename qa/L0_sequence_batcher/request_timeout_models/{identity_fp32_timeout => custom_sequence_int32_timeout}/config.pbtxt (75%)

diff --git a/qa/L0_sequence_batcher/request_timeout_models/identity_fp32_timeout/config.pbtxt b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
similarity index 75%
rename from qa/L0_sequence_batcher/request_timeout_models/identity_fp32_timeout/config.pbtxt
rename to qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
index f1dc316de1..84b6d62411 100644
--- a/qa/L0_sequence_batcher/request_timeout_models/identity_fp32_timeout/config.pbtxt
+++ b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
@@ -24,22 +24,22 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-name: "identity_fp32_timeout"
-backend: "python"
+backend: "sequence"
+default_model_filename: "libtriton_sequence.so"
 max_batch_size: 1
 
 input [
   {
-    name: "INPUT0"
-    data_type: TYPE_FP32
+    name: "INPUT"
+    data_type: TYPE_INT32
     dims: [ 1 ]
   }
 ]
 
 output [
   {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
+    name: "OUTPUT"
+    data_type: TYPE_INT32
     dims: [ 1 ]
   }
 ]
@@ -53,4 +53,31 @@ instance_group [
 
 sequence_batching {
   max_sequence_idle_microseconds: 50000000
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
 }
+
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "3000" }
+  }
+]
\ No newline at end of file
diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py
index 0c30b23c5b..8d71f11c87 100644
--- a/qa/L0_sequence_batcher/sequence_batcher_test.py
+++ b/qa/L0_sequence_batcher/sequence_batcher_test.py
@@ -2860,15 +2860,21 @@ def setUp(self):
         self.server_address_ = os.environ.get('TRITONSERVER_IPADDR',
                                               'localhost') + ":8001"
 
-        self.model_name_ = "identity_fp32_timeout"
-        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.float32)
-        self.inputs_ = [grpcclient.InferInput('INPUT0', [1, 1], "FP32")]
+        # Prepare input and expected output based on the model and
+        # the infer sequence sent for testing. If the test is to be extended
+        # for different sequence and model, then proper grouping should be added
+        self.model_name_ = "custom_sequence_int32_timeout"
+        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
+        self.inputs_ = [grpcclient.InferInput('INPUT', [1, 1], "INT32")]
         self.inputs_[0].set_data_from_numpy(self.tensor_data_)
+        self.expected_out_seq_ = [("OUTPUT", self.tensor_data_),
+                                  ("OUTPUT", self.tensor_data_ * 2),
+                                  ("OUTPUT", self.tensor_data_ * 3)]
 
     def send_sequence_with_timeout(self,
                                    seq_id,
                                    callback,
-                                   timeout_us=3000000,
+                                   timeout_us=2000000,
                                    request_pause_sec=0):
         with grpcclient.InferenceServerClient(
                 self.server_address_) as triton_client:
@@ -2897,8 +2903,8 @@ def test_request_timeout(self):
         # expect the timeout will only be expired on backlog sequence and reject
         # all requests of the sequence once expired.
         # Sending two sequences while the model can only process one sequence
-        # at a time. Each model execution takes 5 second and all requests have
-        # 3 second timeout, so the second sequence will be rejected.
+        # at a time. Each model execution takes 3 second and all requests have
+        # 2 second timeout, so the second sequence will be rejected.
 
         # correlation ID is 1-index
         seq1_res = []
@@ -2920,16 +2926,19 @@ def test_request_timeout(self):
         for t in threads:
             t.join()
 
-        for result, error in seq1_res:
+        for idx in range(len(seq1_res)):
+            result, error = seq1_res[idx]
             self.assertIsNone(
                 error,
                 "Expect sucessful inference for sequence 1 requests, got error: {}"
                 .format(error))
+            out = result.as_numpy(self.expected_out_seq_[idx][0])
+            expected_out = self.expected_out_seq_[idx][1]
             np.testing.assert_allclose(
-                result.as_numpy("OUTPUT0"),
-                self.tensor_data_,
-                err_msg="Unexpected output tensor, got {}".format(
-                    result.as_numpy("OUTPUT0")))
+                out,
+                expected_out,
+                err_msg="Unexpected output tensor: expect {}, got {}".format(
+                    expected_out, out))
 
         for _, error in seq2_res:
             self.assertIsNotNone(error, "Expect error for sequence 2 requests")
diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
index 4a969dba0c..22d6da91bc 100755
--- a/qa/L0_sequence_batcher/test.sh
+++ b/qa/L0_sequence_batcher/test.sh
@@ -735,8 +735,7 @@ if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1
 
     TEST_CASE=SequenceBatcherRequestTimeoutTest
     MODEL_PATH=request_timeout_models
-    mkdir -p ${MODEL_PATH}/identity_fp32_timeout/1
-    cp ../python_models/identity_fp32_timeout/model.py ${MODEL_PATH}/identity_fp32_timeout/1/.
+    cp -r ../custom_models/custom_sequence_int32/1 ${MODEL_PATH}/custom_sequence_int32_timeout
 
     SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
     SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"

From 6efb0fd977e0ac4c7d2df9e74ea7775078fc97ef Mon Sep 17 00:00:00 2001
From: GuanLuo <gluo@nvidia.com>
Date: Tue, 25 Apr 2023 16:44:33 -0700
Subject: [PATCH 2/2] Use identity backend that is built by default on Windows

---
 .../config.pbtxt                              | 27 +++----------------
 .../sequence_batcher_test.py                  |  8 +++---
 qa/L0_sequence_batcher/test.sh                |  2 +-
 3 files changed, 8 insertions(+), 29 deletions(-)

diff --git a/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
index 84b6d62411..1573534432 100644
--- a/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
+++ b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt
@@ -24,13 +24,12 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-backend: "sequence"
-default_model_filename: "libtriton_sequence.so"
+backend: "identity"
 max_batch_size: 1
 
 input [
   {
-    name: "INPUT"
+    name: "INPUT0"
     data_type: TYPE_INT32
     dims: [ 1 ]
   }
@@ -38,7 +37,7 @@ input [
 
 output [
   {
-    name: "OUTPUT"
+    name: "OUTPUT0"
     data_type: TYPE_INT32
     dims: [ 1 ]
   }
@@ -53,26 +52,6 @@ instance_group [
 
 sequence_batching {
   max_sequence_idle_microseconds: 50000000
-  control_input [
-    {
-      name: "START"
-      control [
-        {
-          kind: CONTROL_SEQUENCE_START
-          int32_false_true: [ 0, 1 ]
-        }
-      ]
-    },
-    {
-      name: "READY"
-      control [
-        {
-          kind: CONTROL_SEQUENCE_READY
-          int32_false_true: [ 0, 1 ]
-        }
-      ]
-    }
-  ]
 }
 
 parameters [
diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py
index 8d71f11c87..8d7b4c1fd4 100644
--- a/qa/L0_sequence_batcher/sequence_batcher_test.py
+++ b/qa/L0_sequence_batcher/sequence_batcher_test.py
@@ -2865,11 +2865,11 @@ def setUp(self):
         # for different sequence and model, then proper grouping should be added
         self.model_name_ = "custom_sequence_int32_timeout"
         self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
-        self.inputs_ = [grpcclient.InferInput('INPUT', [1, 1], "INT32")]
+        self.inputs_ = [grpcclient.InferInput('INPUT0', [1, 1], "INT32")]
         self.inputs_[0].set_data_from_numpy(self.tensor_data_)
-        self.expected_out_seq_ = [("OUTPUT", self.tensor_data_),
-                                  ("OUTPUT", self.tensor_data_ * 2),
-                                  ("OUTPUT", self.tensor_data_ * 3)]
+        self.expected_out_seq_ = [("OUTPUT0", self.tensor_data_),
+                                  ("OUTPUT0", self.tensor_data_),
+                                  ("OUTPUT0", self.tensor_data_)]
 
     def send_sequence_with_timeout(self,
                                    seq_id,
diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
index 22d6da91bc..7d90641b0d 100755
--- a/qa/L0_sequence_batcher/test.sh
+++ b/qa/L0_sequence_batcher/test.sh
@@ -735,7 +735,7 @@ if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1
 
     TEST_CASE=SequenceBatcherRequestTimeoutTest
     MODEL_PATH=request_timeout_models
-    cp -r ../custom_models/custom_sequence_int32/1 ${MODEL_PATH}/custom_sequence_int32_timeout
+    mkdir -p ${MODEL_PATH}/custom_sequence_int32_timeout/1
 
     SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
     SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"