From b9b2c93bf206e606db38a3ea798780b017bf4117 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Sun, 4 Jun 2023 17:32:55 -0700 Subject: [PATCH] Add test with explicit resource --- qa/L0_model_update/instance_update_test.py | 48 ++++++++++++++++------ qa/L0_model_update/test.sh | 11 +++-- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index 421292310f..7f1fa1cd0a 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -338,10 +338,10 @@ def test_infer_while_updating(self): # Unload model self.__unload_model() - # Test instance resource requirement update - @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == "execution_count", + # Test instance resource requirement increase + @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"], "Rate limiter precondition not met for this test") - def test_instance_resource_update(self): + def test_instance_resource_increase(self): # Load model self.__load_model( 1, @@ -365,19 +365,41 @@ def infer(): time.sleep(infer_count / 2) # each infer should take < 0.5 seconds self.assertNotIn(False, infer_complete, "Infer possibly stuck") infer_thread.result() - # Decrease the resource requirement - self.__update_instance_count( - 1, 1, - "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 6\n}\n]\n}\n}" + # Unload model + self.__unload_model() + + # Test instance resource requirement increase above explicit resource + @unittest.skipUnless(os.environ["RATE_LIMIT_MODE"] == + "execution_count_with_explicit_resource", + "Rate limiter precondition not met for this test") + def test_instance_resource_increase_above_explicit(self): + # Load model + self.__load_model( + 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}" ) - # Further decrease the resource requirement. The previous decrease - # should have lower the max resource in the rate limiter, which the - # error "Should not print this ..." should not be printed into the - # server log because the max resource is above the previously set limit - # and it will be checked by the main bash test script. + # Increase resource requirement + with self.assertRaises(InferenceServerException): + self.__update_instance_count( + 1, 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 32\n}\n]\n}\n}" + ) + # Unload model + self.__triton.unload_model(self.__model_name) + + # Test instance resource requirement decrease + @unittest.skipUnless("execution_count" in os.environ["RATE_LIMIT_MODE"], + "Rate limiter precondition not met for this test") + def test_instance_resource_decrease(self): + # Load model + self.__load_model( + 1, + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}" + ) + # Decrease resource requirement self.__update_instance_count( 1, 1, - "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 4\n}\n]\n}\n}" + "{\ncount: 1\nkind: KIND_CPU\nrate_limiter {\nresources [\n{\nname: \"R1\"\ncount: 2\n}\n]\n}\n}" ) # Unload model self.__unload_model() diff --git a/qa/L0_model_update/test.sh b/qa/L0_model_update/test.sh index 5a4eb09d69..7f8c23e38a 100755 --- a/qa/L0_model_update/test.sh +++ b/qa/L0_model_update/test.sh @@ -55,15 +55,20 @@ function setup_models() { RET=0 -# Test model instance update with and without rate limiting enabled -for RATE_LIMIT_MODE in "off" "execution_count"; do +# Test model instance update with rate limiting on/off and explicit resource +for RATE_LIMIT_MODE in "off" "execution_count" "execution_count_with_explicit_resource"; do + + RATE_LIMIT_ARGS="--rate-limit=$RATE_LIMIT_MODE" + if [ "$RATE_LIMIT_MODE" == "execution_count_with_explicit_resource" ]; then + RATE_LIMIT_ARGS="--rate-limit=execution_count --rate-limit-resource=R1:10" + fi export RATE_LIMIT_MODE=$RATE_LIMIT_MODE TEST_LOG="instance_update_test.rate_limit_$RATE_LIMIT_MODE.log" SERVER_LOG="./instance_update_test.rate_limit_$RATE_LIMIT_MODE.server.log" setup_models - SERVER_ARGS="--model-repository=models --model-control-mode=explicit --rate-limit=$RATE_LIMIT_MODE --log-verbose=2" + SERVER_ARGS="--model-repository=models --model-control-mode=explicit $RATE_LIMIT_ARGS --log-verbose=2" run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***"