diff --git a/qa/L0_client_timeout/client_timeout_test.py b/qa/L0_client_timeout/client_timeout_test.py index c0ea76bc8b..1f7ac7ae0f 100755 --- a/qa/L0_client_timeout/client_timeout_test.py +++ b/qa/L0_client_timeout/client_timeout_test.py @@ -60,22 +60,33 @@ def setUp(self): self.model_name_ = "custom_identity_int32" self.input0_data_ = np.array([[10]], dtype=np.int32) self.input0_data_byte_size_ = 32 - self.SMALL_INTERVAL = sys.float_info.min # guarantees a timeout - self.NORMAL_INTERVAL = 5 # seconds for server to load then receive request + self.SMALL_INTERVAL = 0.1 # seconds for a timeout + self.INFER_SMALL_INTERVAL = 2.0 # seconds for a timeout + self.NORMAL_INTERVAL = 5.0 # seconds for server to load then receive request def test_grpc_server_live(self): + triton_client = grpcclient.InferenceServerClient( + url="localhost:8001", verbose=True + ) + with self.assertRaises(InferenceServerException) as cm: + _ = triton_client.is_server_live(client_timeout=self.SMALL_INTERVAL) + self.assertIn("Deadline Exceeded", str(cm.exception)) + self.assertTrue( + triton_client.is_server_live(client_timeout=self.NORMAL_INTERVAL) + ) + + def test_grpc_is_server_ready(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) with self.assertRaises(InferenceServerException) as cm: _ = triton_client.is_server_ready(client_timeout=self.SMALL_INTERVAL) self.assertIn("Deadline Exceeded", str(cm.exception)) - # server should already be ready self.assertTrue( triton_client.is_server_ready(client_timeout=self.NORMAL_INTERVAL) ) - def test_grpc_model_ready(self): + def test_grpc_is_model_ready(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) @@ -90,7 +101,7 @@ def test_grpc_model_ready(self): ) ) - def test_grpc_server_metadata(self): + def test_grpc_get_server_metadata(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) @@ -100,7 +111,20 @@ def test_grpc_server_metadata(self): triton_client.get_server_metadata(client_timeout=self.NORMAL_INTERVAL) - def test_grpc_model_config(self): + def test_grpc_get_model_metadata(self): + triton_client = grpcclient.InferenceServerClient( + url="localhost:8001", verbose=True + ) + with self.assertRaises(InferenceServerException) as cm: + _ = triton_client.get_model_metadata( + model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL + ) + self.assertIn("Deadline Exceeded", str(cm.exception)) + triton_client.get_model_metadata( + model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL + ) + + def test_grpc_get_model_config(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) @@ -128,6 +152,7 @@ def test_grpc_load_model(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) + triton_client.unload_model(model_name=self.model_name_) with self.assertRaises(InferenceServerException) as cm: _ = triton_client.load_model( model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL @@ -155,7 +180,7 @@ def test_grpc_unload_model(self): ) triton_client.load_model(model_name=self.model_name_) - def test_grpc_inference_statistics(self): + def test_grpc_get_inference_statistics(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) @@ -221,51 +246,70 @@ def test_grpc_get_log_settings(self): as_json=True, client_timeout=self.NORMAL_INTERVAL ) + def test_grpc_get_system_shared_memory_status(self): + triton_client = grpcclient.InferenceServerClient( + url="localhost:8001", verbose=True + ) + with self.assertRaises(InferenceServerException) as cm: + _ = triton_client.get_system_shared_memory_status( + client_timeout=self.SMALL_INTERVAL + ) + self.assertIn("Deadline Exceeded", str(cm.exception)) + triton_client.get_system_shared_memory_status( + client_timeout=self.NORMAL_INTERVAL + ) + def test_grpc_register_system_shared_memory(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) + triton_client.unregister_system_shared_memory() + import tritonclient.utils.shared_memory as shm + shm_ip0_handle = shm.create_shared_memory_region( + "input0_data", "/input_simple", self.input0_data_byte_size_ + ) + shm.set_shared_memory_region(shm_ip0_handle, [self.input0_data_]) with self.assertRaises(InferenceServerException) as cm: _ = triton_client.register_system_shared_memory( - "input_data", + "input0_data", "/input_simple", self.input0_data_byte_size_, client_timeout=self.SMALL_INTERVAL, ) + self.assertIn("Deadline Exceeded", str(cm.exception)) + triton_client.unregister_system_shared_memory() triton_client.register_system_shared_memory( - "input_data", + "input0_data", "/input_simple", self.input0_data_byte_size_, client_timeout=self.NORMAL_INTERVAL, ) - self.assertIn("Deadline Exceeded", str(cm.exception)) + triton_client.unregister_system_shared_memory() - def test_grpc_get_system_shared_memory(self): + def test_grpc_unregister_system_shared_memory(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) with self.assertRaises(InferenceServerException) as cm: - _ = triton_client.get_system_shared_memory_status( + _ = triton_client.unregister_system_shared_memory( client_timeout=self.SMALL_INTERVAL ) self.assertIn("Deadline Exceeded", str(cm.exception)) - triton_client.get_system_shared_memory_status( + triton_client.unregister_system_shared_memory( client_timeout=self.NORMAL_INTERVAL ) - def test_grpc_unregister_system_shared_memory(self): + def test_grpc_get_cuda_shared_memory_status(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) with self.assertRaises(InferenceServerException) as cm: - _ = triton_client.unregister_system_shared_memory( + _ = triton_client.get_cuda_shared_memory_status( client_timeout=self.SMALL_INTERVAL ) self.assertIn("Deadline Exceeded", str(cm.exception)) - triton_client.unregister_system_shared_memory( - client_timeout=self.NORMAL_INTERVAL - ) + triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL) def test_grpc_register_cuda_shared_memory(self): triton_client = grpcclient.InferenceServerClient( @@ -298,18 +342,7 @@ def test_grpc_register_cuda_shared_memory(self): ) cshm.destroy_shared_memory_region(shm_op0_handle) - def test_grpc_get_cuda_shared_memory_status(self): - triton_client = grpcclient.InferenceServerClient( - url="localhost:8001", verbose=True - ) - with self.assertRaises(InferenceServerException) as cm: - _ = triton_client.get_cuda_shared_memory_status( - client_timeout=self.SMALL_INTERVAL - ) - self.assertIn("Deadline Exceeded", str(cm.exception)) - triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL) - - def test_grpc_uregister_cuda_shared_memory(self): + def test_grpc_unregister_cuda_shared_memory(self): triton_client = grpcclient.InferenceServerClient( url="localhost:8001", verbose=True ) @@ -379,7 +412,7 @@ def test_grpc_async_infer(self): inputs=self.inputs_, callback=partial(callback, user_data), outputs=self.outputs_, - client_timeout=2, + client_timeout=self.INFER_SMALL_INTERVAL, ) data_item = user_data._completed_requests.get() if type(data_item) == InferenceServerException: @@ -451,7 +484,9 @@ def test_http_infer(self): # response. Expect an exception for small timeout values. with self.assertRaises(socket.timeout) as cm: triton_client = httpclient.InferenceServerClient( - url="localhost:8000", verbose=True, network_timeout=2.0 + url="localhost:8000", + verbose=True, + network_timeout=self.INFER_SMALL_INTERVAL, ) _ = triton_client.infer( model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_ diff --git a/qa/L0_client_timeout/test.sh b/qa/L0_client_timeout/test.sh index fe0562bb10..518c384235 100755 --- a/qa/L0_client_timeout/test.sh +++ b/qa/L0_client_timeout/test.sh @@ -40,6 +40,7 @@ fi export CUDA_VISIBLE_DEVICES=0 TIMEOUT_VALUE=100000000 +SHORT_TIMEOUT_VALUE=1000 RET=0 CLIENT_TIMEOUT_TEST=client_timeout_test.py @@ -53,11 +54,13 @@ CLIENT_LOG=`pwd`/client.log CLIENT_GRPC_TIMEOUTS_LOG=`pwd`/client.log.grpc DATADIR=`pwd`/models SERVER=/opt/tritonserver/bin/tritonserver -SERVER_ARGS="--model-repository=$DATADIR --model-control-mode=explicit" +SERVER_ARGS="--model-repository=$DATADIR --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2" source ../common/util.sh mkdir -p $DATADIR/custom_identity_int32/1 +# Test all APIs apart from Infer. +export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=1 run_server if [ "$SERVER_PID" == "0" ]; then echo -e "\n***\n*** Failed to start $SERVER\n***" @@ -66,10 +69,9 @@ if [ "$SERVER_PID" == "0" ]; then fi set +e - -# Test all APIs apart from Infer +# Expect timeout for everything sed -i 's#value: { string_value: "0" }#value: { string_value: "1" }#' $DATADIR/custom_identity_int32/config.pbtxt -$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc -p >> ${CLIENT_LOG}.c++.grpc_non_infer_apis 2>&1 +$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG}.c++.grpc_non_infer_apis 2>&1 if [ $? -eq 0 ]; then RET=1 fi @@ -78,7 +80,6 @@ if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_non_infer_apis` != "1" echo -e "\n***\n*** Test Failed\n***" RET=1 fi - # Test all APIs with long timeout $CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG} 2>&1 if [ $? -eq 1 ]; then @@ -91,6 +92,7 @@ kill $SERVER_PID wait $SERVER_PID # Test infer APIs +export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC= SERVER_ARGS="--model-repository=$DATADIR" sed -i 's#value: { string_value: "1" }#value: { string_value: "0" }#' $DATADIR/custom_identity_int32/config.pbtxt run_server @@ -105,7 +107,7 @@ set +e # Note, the custom_identity_int32 is configured with a delay # of 3 sec. # Test request timeout in grpc synchronous inference -$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc >> ${CLIENT_LOG}.c++.grpc_infer 2>&1 +$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc >> ${CLIENT_LOG}.c++.grpc_infer 2>&1 if [ $? -eq 0 ]; then RET=1 fi @@ -116,7 +118,7 @@ if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_infer` != "1" ]; then fi # Test request timeout in grpc asynchronous inference -$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1 +$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1 if [ $? -eq 0 ]; then RET=1 fi @@ -127,7 +129,7 @@ if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_async_infer` != "1" ]; fi # Test stream timeout in grpc asynchronous streaming inference -$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1 +$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1 if [ $? -eq 0 ]; then RET=1 fi @@ -138,7 +140,7 @@ if [ `grep -c "Stream has been closed" ${CLIENT_LOG}.c++.grpc_async_stream_infer fi # Test request timeout in http synchronous inference -$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v >> ${CLIENT_LOG}.c++.http_infer 2>&1 +$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v >> ${CLIENT_LOG}.c++.http_infer 2>&1 if [ $? -eq 0 ]; then RET=1 fi @@ -150,7 +152,7 @@ fi # Test request timeout in http asynchronous inference -$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1 +$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1 if [ $? -eq 0 ]; then RET=1 fi @@ -238,7 +240,8 @@ kill $SERVER_PID wait $SERVER_PID # Test all APIs other than infer -SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit" +export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=1 +SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2" sed -i 's#value: { string_value: "0" }#value: { string_value: "1" }#' $DATADIR/custom_identity_int32/config.pbtxt run_server if [ "$SERVER_PID" == "0" ]; then @@ -247,20 +250,27 @@ if [ "$SERVER_PID" == "0" ]; then exit 1 fi set +e -export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=1 -for i in test_grpc_load_model \ + +for i in test_grpc_server_live \ + test_grpc_is_server_ready \ + test_grpc_is_model_ready \ + test_grpc_get_server_metadata \ + test_grpc_get_model_metadata \ + test_grpc_get_model_config \ + test_grpc_model_repository_index \ + test_grpc_load_model \ test_grpc_unload_model \ - test_grpc_inference_statistics \ + test_grpc_get_inference_statistics \ test_grpc_update_trace_settings \ test_grpc_get_trace_settings \ test_grpc_update_log_settings \ test_grpc_get_log_settings \ + test_grpc_get_system_shared_memory_status \ test_grpc_register_system_shared_memory \ - test_grpc_get_system_shared_memory \ test_grpc_unregister_system_shared_memory \ - test_grpc_register_cuda_shared_memory \ test_grpc_get_cuda_shared_memory_status \ - test_grpc_uregister_cuda_shared_memory \ + test_grpc_register_cuda_shared_memory \ + test_grpc_unregister_cuda_shared_memory \ ; do python $CLIENT_TIMEOUT_TEST ClientTimeoutTest.$i >>$CLIENT_LOG 2>&1 if [ $? -ne 0 ]; then diff --git a/src/grpc/grpc_utils.cc b/src/grpc/grpc_utils.cc index cdd8b5cf8e..62fd93272b 100644 --- a/src/grpc/grpc_utils.cc +++ b/src/grpc/grpc_utils.cc @@ -27,6 +27,7 @@ #include "grpc_utils.h" #include +#include #include namespace triton { namespace server { namespace grpc { @@ -83,8 +84,8 @@ GrpcStatusUtil::Create(::grpc::Status* status, TRITONSERVER_Error* err) // Will delay the write of the response by the specified time. // This can be used to test the flow where there are other // responses available to be written. - LOG_INFO << "Delaying the write of the response by " << delay_response - << " seconds"; + LOG_VERBOSE(1) << "Delaying the write of the response by " << delay_response + << " seconds"; std::this_thread::sleep_for(std::chrono::seconds(delay_response)); }