From 45ce9d807bde225a277b0119f15d24c2dda5fa72 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Mon, 3 Jul 2023 16:15:39 -0700 Subject: [PATCH 1/5] Update and refactor test for adding name into instance signature --- qa/L0_model_update/instance_update_test.py | 61 +++++++++------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index 39f5bfc8d4..88dba6cd4d 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -186,27 +186,6 @@ def test_gpu_instance_update(self): self.__update_instance_count(3, 0, "{\ncount: 7\nkind: KIND_GPU\n}") self.__unload_model() - # Test add/remove multiple CPU/GPU instances at a time - def test_gpu_cpu_instance_update(self): - # Load model with 1 GPU instance and 2 CPU instance - self.__load_model( - 3, - "{\ncount: 2\nkind: KIND_CPU\n},\n{\ncount: 1\nkind: KIND_GPU\n}") - # Add 2 GPU instance and remove 1 CPU instance - self.__update_instance_count( - 2, 1, - "{\ncount: 1\nkind: KIND_CPU\n},\n{\ncount: 3\nkind: KIND_GPU\n}") - # Shuffle the instances - self.__update_instance_count( - 0, 0, - "{\ncount: 3\nkind: KIND_GPU\n},\n{\ncount: 1\nkind: KIND_CPU\n}") - # Remove 1 GPU instance and add 1 CPU instance - self.__update_instance_count( - 1, 1, - "{\ncount: 2\nkind: KIND_GPU\n},\n{\ncount: 2\nkind: KIND_CPU\n}") - # Unload model - self.__unload_model() - # Test model instance name update def test_instance_name_update(self): # Load 3 instances with 2 different names @@ -214,37 +193,45 @@ def test_instance_name_update(self): 3, "{\nname: \"old_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"old_2\"\ncount: 2\nkind: KIND_GPU\n}" ) - # Change the instance names + # Rename all instances self.__update_instance_count( - 0, 0, + 3, 3, "{\nname: \"new_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"new_2\"\ncount: 2\nkind: KIND_GPU\n}" ) + # Shuffle the instances + self.__update_instance_count( + 0, 0, + "{\nname: \"new_2\"\ncount: 2\nkind: KIND_GPU\n},\n{\nname: \"new_1\"\ncount: 1\nkind: KIND_CPU\n}" + ) + time.sleep(0.1) # larger the gap for config.pbtxt timestamp to update # Unload model self.__unload_model() - # Test instance signature grouping - def test_instance_signature(self): - # Load 2 GPU instances and 3 CPU instances + # Test multiple model instances with the same name + def test_instance_same_name(self): + # Load 2 instances with the same name self.__load_model( - 5, - "{\nname: \"GPU_group\"\ncount: 2\nkind: KIND_GPU\n},\n{\nname: \"CPU_group\"\ncount: 3\nkind: KIND_CPU\n}" + 2, + "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" ) - # Flatten the instances representation + # Remove 1 instance with the same name self.__update_instance_count( - 0, 0, - "{\nname: \"CPU_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"CPU_2_3\"\ncount: 2\nkind: KIND_CPU\n},\n{\nname: \"GPU_1\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"GPU_2\"\ncount: 1\nkind: KIND_GPU\n}" + 0, 1, "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}") + # Add 2 instances with the same name + self.__update_instance_count( + 2, 0, + "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" ) - time.sleep(0.1) # larger the gap for config.pbtxt timestamp to update - # Consolidate different representations + # No change self.__update_instance_count( 0, 0, - "{\nname: \"CPU_group\"\ncount: 3\nkind: KIND_CPU\n},\n{\nname: \"GPU_group\"\ncount: 2\nkind: KIND_GPU\n}" + "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" ) time.sleep(0.1) # larger the gap for config.pbtxt timestamp to update - # Flatten the instances representation + # Change instance kind but keeping the same name self.__update_instance_count( - 0, 0, - "{\nname: \"GPU_1\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"GPU_2\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"CPU_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"CPU_2\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"CPU_3\"\ncount: 1\nkind: KIND_CPU\n}" + 1, 1, + "{\nname: \"n\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" ) # Unload model self.__unload_model() From 28efec66cacf6bc292cd91e875a099fa66656641 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 12 Jul 2023 16:34:25 -0700 Subject: [PATCH 2/5] Revert "Update and refactor test for adding name into instance signature" --- qa/L0_model_update/instance_update_test.py | 61 +++++++++++++--------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/qa/L0_model_update/instance_update_test.py b/qa/L0_model_update/instance_update_test.py index 88dba6cd4d..39f5bfc8d4 100644 --- a/qa/L0_model_update/instance_update_test.py +++ b/qa/L0_model_update/instance_update_test.py @@ -186,6 +186,27 @@ def test_gpu_instance_update(self): self.__update_instance_count(3, 0, "{\ncount: 7\nkind: KIND_GPU\n}") self.__unload_model() + # Test add/remove multiple CPU/GPU instances at a time + def test_gpu_cpu_instance_update(self): + # Load model with 1 GPU instance and 2 CPU instance + self.__load_model( + 3, + "{\ncount: 2\nkind: KIND_CPU\n},\n{\ncount: 1\nkind: KIND_GPU\n}") + # Add 2 GPU instance and remove 1 CPU instance + self.__update_instance_count( + 2, 1, + "{\ncount: 1\nkind: KIND_CPU\n},\n{\ncount: 3\nkind: KIND_GPU\n}") + # Shuffle the instances + self.__update_instance_count( + 0, 0, + "{\ncount: 3\nkind: KIND_GPU\n},\n{\ncount: 1\nkind: KIND_CPU\n}") + # Remove 1 GPU instance and add 1 CPU instance + self.__update_instance_count( + 1, 1, + "{\ncount: 2\nkind: KIND_GPU\n},\n{\ncount: 2\nkind: KIND_CPU\n}") + # Unload model + self.__unload_model() + # Test model instance name update def test_instance_name_update(self): # Load 3 instances with 2 different names @@ -193,45 +214,37 @@ def test_instance_name_update(self): 3, "{\nname: \"old_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"old_2\"\ncount: 2\nkind: KIND_GPU\n}" ) - # Rename all instances - self.__update_instance_count( - 3, 3, - "{\nname: \"new_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"new_2\"\ncount: 2\nkind: KIND_GPU\n}" - ) - # Shuffle the instances + # Change the instance names self.__update_instance_count( 0, 0, - "{\nname: \"new_2\"\ncount: 2\nkind: KIND_GPU\n},\n{\nname: \"new_1\"\ncount: 1\nkind: KIND_CPU\n}" + "{\nname: \"new_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"new_2\"\ncount: 2\nkind: KIND_GPU\n}" ) - time.sleep(0.1) # larger the gap for config.pbtxt timestamp to update # Unload model self.__unload_model() - # Test multiple model instances with the same name - def test_instance_same_name(self): - # Load 2 instances with the same name + # Test instance signature grouping + def test_instance_signature(self): + # Load 2 GPU instances and 3 CPU instances self.__load_model( - 2, - "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" + 5, + "{\nname: \"GPU_group\"\ncount: 2\nkind: KIND_GPU\n},\n{\nname: \"CPU_group\"\ncount: 3\nkind: KIND_CPU\n}" ) - # Remove 1 instance with the same name + # Flatten the instances representation self.__update_instance_count( - 0, 1, "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}") - # Add 2 instances with the same name - self.__update_instance_count( - 2, 0, - "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" + 0, 0, + "{\nname: \"CPU_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"CPU_2_3\"\ncount: 2\nkind: KIND_CPU\n},\n{\nname: \"GPU_1\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"GPU_2\"\ncount: 1\nkind: KIND_GPU\n}" ) - # No change + time.sleep(0.1) # larger the gap for config.pbtxt timestamp to update + # Consolidate different representations self.__update_instance_count( 0, 0, - "{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" + "{\nname: \"CPU_group\"\ncount: 3\nkind: KIND_CPU\n},\n{\nname: \"GPU_group\"\ncount: 2\nkind: KIND_GPU\n}" ) time.sleep(0.1) # larger the gap for config.pbtxt timestamp to update - # Change instance kind but keeping the same name + # Flatten the instances representation self.__update_instance_count( - 1, 1, - "{\nname: \"n\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"n\"\ncount: 1\nkind: KIND_CPU\n}" + 0, 0, + "{\nname: \"GPU_1\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"GPU_2\"\ncount: 1\nkind: KIND_GPU\n},\n{\nname: \"CPU_1\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"CPU_2\"\ncount: 1\nkind: KIND_CPU\n},\n{\nname: \"CPU_3\"\ncount: 1\nkind: KIND_CPU\n}" ) # Unload model self.__unload_model() From 172ddf2fa8e3bd42c399f11534ef64e91b55cb76 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 12 Jul 2023 17:05:11 -0700 Subject: [PATCH 3/5] Document may reuse an instance differ in name --- docs/user_guide/model_management.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/user_guide/model_management.md b/docs/user_guide/model_management.md index ae1c24da20..25aa742df7 100644 --- a/docs/user_guide/model_management.md +++ b/docs/user_guide/model_management.md @@ -224,6 +224,8 @@ request is received under 'config.pbtxt' is modified in place. The swap file is not part of the model configuration, so its presence in the model directory may be detected as a new file and cause the model to fully reload when only an update is expected. + * The model instance reused might carry a different name than the one provided +on the 'config.pbtxt', but the instances are equivalent beside their names. * If a sequence model is updated with in-flight sequence(s), Triton does not guarentee any remaining request(s) from the in-flight sequence(s) will be routed From 3e2cb7880c043bf9142876911e976e94cebc93a3 Mon Sep 17 00:00:00 2001 From: Jacky <18255193+kthui@users.noreply.github.com> Date: Thu, 13 Jul 2023 10:51:25 -0700 Subject: [PATCH 4/5] Update docs wording Co-authored-by: Neelay Shah --- docs/user_guide/model_management.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user_guide/model_management.md b/docs/user_guide/model_management.md index 25aa742df7..998710034d 100644 --- a/docs/user_guide/model_management.md +++ b/docs/user_guide/model_management.md @@ -224,7 +224,7 @@ request is received under 'config.pbtxt' is modified in place. The swap file is not part of the model configuration, so its presence in the model directory may be detected as a new file and cause the model to fully reload when only an update is expected. - * The model instance reused might carry a different name than the one provided + * When a model configuration is reloaded existing model instances will be checked against the new configuration and be reused if possible. If a model instance is reused it will retain its original name. on the 'config.pbtxt', but the instances are equivalent beside their names. * If a sequence model is updated with in-flight sequence(s), Triton does not From 9299f2a33f636421523c156286bc51dd738e5b61 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 14 Jul 2023 11:15:29 -0700 Subject: [PATCH 5/5] Fix documentation --- docs/user_guide/model_management.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/user_guide/model_management.md b/docs/user_guide/model_management.md index 998710034d..c271f3e61c 100644 --- a/docs/user_guide/model_management.md +++ b/docs/user_guide/model_management.md @@ -224,8 +224,9 @@ request is received under 'config.pbtxt' is modified in place. The swap file is not part of the model configuration, so its presence in the model directory may be detected as a new file and cause the model to fully reload when only an update is expected. - * When a model configuration is reloaded existing model instances will be checked against the new configuration and be reused if possible. If a model instance is reused it will retain its original name. -on the 'config.pbtxt', but the instances are equivalent beside their names. + * When a model configuration is reloaded existing model instances will be +checked against the new configuration and be reused if possible. If a model +instance is reused it will retain its original name. * If a sequence model is updated with in-flight sequence(s), Triton does not guarentee any remaining request(s) from the in-flight sequence(s) will be routed