From b9b2c93bf206e606db38a3ea798780b017bf4117 Mon Sep 17 00:00:00 2001
From: kthui <18255193+kthui@users.noreply.github.com>
Date: Sun, 4 Jun 2023 17:32:55 -0700
Subject: [PATCH] Add test with explicit resource

---
 qa/L0_model_update/instance_update_test.py | 48 ++++++++++++++++------
 qa/L0_model_update/test.sh                 | 11 +++--
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py
index 421292310f..7f1fa1cd0a 100644
--- a/qa/L0_model_update/instance_update_test.py
+++ b/qa/L0_model_update/instance_update_test.py
@@ -338,10 +338,10 @@ def test_infer_while_updating(self):
         # Unload model
         self.__unload_model()
 
-    # Test instance resource requirement update
-    @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count",
+    # Test instance resource requirement increase
+    @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"],
                          "Rate limiter precondition not met for this test")
-    def test_instance_resource_update(self):
+    def test_instance_resource_increase(self):
         # Load model
         self.__load_model(
             1,
@@ -365,19 +365,41 @@ def infer():
             time.sleep(infer_count / 2)  # each infer should take < 0.5 seconds
             self.assertNotIn(False, infer_complete, "Infer possibly stuck")
             infer_thread.result()
-        # Decrease the resource requirement
-        self.__update_instance_count(
-            1, 1,
-            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 6\n}\n]\n}\n}"
+        # Unload model
+        self.__unload_model()
+
+    # Test instance resource requirement increase above explicit resource
+    @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] ==
+                         "execution_count_with_explicit_resource",
+                         "Rate limiter precondition not met for this test")
+    def test_instance_resource_increase_above_explicit(self):
+        # Load model
+        self.__load_model(
+            1,
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}"
         )
-        # Further decrease the resource requirement. The previous decrease
-        # should have lower the max resource in the rate limiter, which the
-        # error "Should not print this ..." should not be printed into the
-        # server log because the max resource is above the previously set limit
-        # and it will be checked by the main bash test script.
+        # Increase resource requirement
+        with self.assertRaises(InferenceServerException):
+            self.__update_instance_count(
+                1, 1,
+                "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 32\n}\n]\n}\n}"
+            )
+        # Unload model
+        self.__triton.unload_model(self.__model_name)
+
+    # Test instance resource requirement decrease
+    @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"],
+                         "Rate limiter precondition not met for this test")
+    def test_instance_resource_decrease(self):
+        # Load model
+        self.__load_model(
+            1,
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}"
+        )
+        # Decrease resource requirement
         self.__update_instance_count(
             1, 1,
-            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}"
+            "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}"
         )
         # Unload model
         self.__unload_model()
diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh
index 5a4eb09d69..7f8c23e38a 100755
--- a/qa/L0_model_update/test.sh
+++ b/qa/L0_model_update/test.sh
@@ -55,15 +55,20 @@ function setup_models() {
 
 RET=0
 
-# Test model instance update with and without rate limiting enabled
-for RATE_LIMIT_MODE in "off" "execution_count"; do
+# Test model instance update with rate limiting on/off and explicit resource
+for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do
+
+    RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE"
+    if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then
+        RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10"
+    fi
 
     export RATE_LIMIT_MODE=$RATE_LIMIT_MODE
     TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log"
     SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log"
 
     setup_models
-    SERVER_ARGS="--model-repository=models --model-control-mode=explicit --rate-limit=$RATE_LIMIT_MODE --log-verbose=2"
+    SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2"
     run_server
     if [ "$SERVER_PID" == "0" ]; then
         echo -e "\n***\n*** Failed to start $SERVER\n***"