Skip to content

Commit

Permalink
finalize testing and make utils take float instead of only int
Browse files Browse the repository at this point in the history
  • Loading branch information
jbkyang-nvi committed Nov 9, 2023
1 parent cd6b206 commit 1d33f34
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 51 deletions.
99 changes: 67 additions & 32 deletions qa/L0_client_timeout/client_timeout_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,22 +60,33 @@ def setUp(self):
self.model_name_ = "custom_identity_int32"
self.input0_data_ = np.array([[10]], dtype=np.int32)
self.input0_data_byte_size_ = 32
self.SMALL_INTERVAL = sys.float_info.min # guarantees a timeout
self.NORMAL_INTERVAL = 5 # seconds for server to load then receive request
self.SMALL_INTERVAL = 0.1 # seconds for a timeout
self.INFER_SMALL_INTERVAL = 2.0 # seconds for a timeout
self.NORMAL_INTERVAL = 5.0 # seconds for server to load then receive request

def test_grpc_server_live(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.is_server_live(client_timeout=self.SMALL_INTERVAL)
self.assertIn("Deadline Exceeded", str(cm.exception))
self.assertTrue(
triton_client.is_server_live(client_timeout=self.NORMAL_INTERVAL)
)

def test_grpc_is_server_ready(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.is_server_ready(client_timeout=self.SMALL_INTERVAL)
self.assertIn("Deadline Exceeded", str(cm.exception))
# server should already be ready
self.assertTrue(
triton_client.is_server_ready(client_timeout=self.NORMAL_INTERVAL)
)

def test_grpc_model_ready(self):
def test_grpc_is_model_ready(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
Expand All @@ -90,7 +101,7 @@ def test_grpc_model_ready(self):
)
)

def test_grpc_server_metadata(self):
def test_grpc_get_server_metadata(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
Expand All @@ -100,7 +111,20 @@ def test_grpc_server_metadata(self):

triton_client.get_server_metadata(client_timeout=self.NORMAL_INTERVAL)

def test_grpc_model_config(self):
def test_grpc_get_model_metadata(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.get_model_metadata(
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.get_model_metadata(
model_name=self.model_name_, client_timeout=self.NORMAL_INTERVAL
)

def test_grpc_get_model_config(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
Expand Down Expand Up @@ -128,6 +152,7 @@ def test_grpc_load_model(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
triton_client.unload_model(model_name=self.model_name_)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.load_model(
model_name=self.model_name_, client_timeout=self.SMALL_INTERVAL
Expand Down Expand Up @@ -155,7 +180,7 @@ def test_grpc_unload_model(self):
)
triton_client.load_model(model_name=self.model_name_)

def test_grpc_inference_statistics(self):
def test_grpc_get_inference_statistics(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
Expand Down Expand Up @@ -221,51 +246,70 @@ def test_grpc_get_log_settings(self):
as_json=True, client_timeout=self.NORMAL_INTERVAL
)

def test_grpc_get_system_shared_memory_status(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.get_system_shared_memory_status(
client_timeout=self.SMALL_INTERVAL
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.get_system_shared_memory_status(
client_timeout=self.NORMAL_INTERVAL
)

def test_grpc_register_system_shared_memory(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
triton_client.unregister_system_shared_memory()
import tritonclient.utils.shared_memory as shm

shm_ip0_handle = shm.create_shared_memory_region(
"input0_data", "/input_simple", self.input0_data_byte_size_
)
shm.set_shared_memory_region(shm_ip0_handle, [self.input0_data_])
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.register_system_shared_memory(
"input_data",
"input0_data",
"/input_simple",
self.input0_data_byte_size_,
client_timeout=self.SMALL_INTERVAL,
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.unregister_system_shared_memory()
triton_client.register_system_shared_memory(
"input_data",
"input0_data",
"/input_simple",
self.input0_data_byte_size_,
client_timeout=self.NORMAL_INTERVAL,
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.unregister_system_shared_memory()

def test_grpc_get_system_shared_memory(self):
def test_grpc_unregister_system_shared_memory(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.get_system_shared_memory_status(
_ = triton_client.unregister_system_shared_memory(
client_timeout=self.SMALL_INTERVAL
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.get_system_shared_memory_status(
triton_client.unregister_system_shared_memory(
client_timeout=self.NORMAL_INTERVAL
)

def test_grpc_unregister_system_shared_memory(self):
def test_grpc_get_cuda_shared_memory_status(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.unregister_system_shared_memory(
_ = triton_client.get_cuda_shared_memory_status(
client_timeout=self.SMALL_INTERVAL
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.unregister_system_shared_memory(
client_timeout=self.NORMAL_INTERVAL
)
triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL)

def test_grpc_register_cuda_shared_memory(self):
triton_client = grpcclient.InferenceServerClient(
Expand Down Expand Up @@ -298,18 +342,7 @@ def test_grpc_register_cuda_shared_memory(self):
)
cshm.destroy_shared_memory_region(shm_op0_handle)

def test_grpc_get_cuda_shared_memory_status(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
with self.assertRaises(InferenceServerException) as cm:
_ = triton_client.get_cuda_shared_memory_status(
client_timeout=self.SMALL_INTERVAL
)
self.assertIn("Deadline Exceeded", str(cm.exception))
triton_client.get_cuda_shared_memory_status(client_timeout=self.NORMAL_INTERVAL)

def test_grpc_uregister_cuda_shared_memory(self):
def test_grpc_unregister_cuda_shared_memory(self):
triton_client = grpcclient.InferenceServerClient(
url="localhost:8001", verbose=True
)
Expand Down Expand Up @@ -379,7 +412,7 @@ def test_grpc_async_infer(self):
inputs=self.inputs_,
callback=partial(callback, user_data),
outputs=self.outputs_,
client_timeout=2,
client_timeout=self.INFER_SMALL_INTERVAL,
)
data_item = user_data._completed_requests.get()
if type(data_item) == InferenceServerException:
Expand Down Expand Up @@ -451,7 +484,9 @@ def test_http_infer(self):
# response. Expect an exception for small timeout values.
with self.assertRaises(socket.timeout) as cm:
triton_client = httpclient.InferenceServerClient(
url="localhost:8000", verbose=True, network_timeout=2.0
url="localhost:8000",
verbose=True,
network_timeout=self.INFER_SMALL_INTERVAL,
)
_ = triton_client.infer(
model_name=self.model_name_, inputs=self.inputs_, outputs=self.outputs_
Expand Down
44 changes: 27 additions & 17 deletions qa/L0_client_timeout/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ fi

export CUDA_VISIBLE_DEVICES=0
TIMEOUT_VALUE=100000000
SHORT_TIMEOUT_VALUE=1000
RET=0

CLIENT_TIMEOUT_TEST=client_timeout_test.py
Expand All @@ -53,11 +54,13 @@ CLIENT_LOG=`pwd`/client.log
CLIENT_GRPC_TIMEOUTS_LOG=`pwd`/client.log.grpc
DATADIR=`pwd`/models
SERVER=/opt/tritonserver/bin/tritonserver
SERVER_ARGS="--model-repository=$DATADIR --model-control-mode=explicit"
SERVER_ARGS="--model-repository=$DATADIR --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
source ../common/util.sh

mkdir -p $DATADIR/custom_identity_int32/1

# Test all APIs apart from Infer.
export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=1
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
Expand All @@ -66,10 +69,9 @@ if [ "$SERVER_PID" == "0" ]; then
fi

set +e

# Test all APIs apart from Infer
# Expect timeout for everything
sed -i 's#value: { string_value: "0" }#value: { string_value: "1" }#' $DATADIR/custom_identity_int32/config.pbtxt
$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc -p >> ${CLIENT_LOG}.c++.grpc_non_infer_apis 2>&1
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG}.c++.grpc_non_infer_apis 2>&1
if [ $? -eq 0 ]; then
RET=1
fi
Expand All @@ -78,7 +80,6 @@ if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_non_infer_apis` != "1"
echo -e "\n***\n*** Test Failed\n***"
RET=1
fi

# Test all APIs with long timeout
$CLIENT_TIMEOUT_TEST_CPP -t $TIMEOUT_VALUE -v -i grpc -p >> ${CLIENT_LOG} 2>&1
if [ $? -eq 1 ]; then
Expand All @@ -91,6 +92,7 @@ kill $SERVER_PID
wait $SERVER_PID

# Test infer APIs
export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=
SERVER_ARGS="--model-repository=$DATADIR"
sed -i 's#value: { string_value: "1" }#value: { string_value: "0" }#' $DATADIR/custom_identity_int32/config.pbtxt
run_server
Expand All @@ -105,7 +107,7 @@ set +e
# Note, the custom_identity_int32 is configured with a delay
# of 3 sec.
# Test request timeout in grpc synchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc >> ${CLIENT_LOG}.c++.grpc_infer 2>&1
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc >> ${CLIENT_LOG}.c++.grpc_infer 2>&1
if [ $? -eq 0 ]; then
RET=1
fi
Expand All @@ -116,7 +118,7 @@ if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_infer` != "1" ]; then
fi

# Test request timeout in grpc asynchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -a >> ${CLIENT_LOG}.c++.grpc_async_infer 2>&1
if [ $? -eq 0 ]; then
RET=1
fi
Expand All @@ -127,7 +129,7 @@ if [ `grep -c "Deadline Exceeded" ${CLIENT_LOG}.c++.grpc_async_infer` != "1" ];
fi

# Test stream timeout in grpc asynchronous streaming inference
$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -i grpc -s >> ${CLIENT_LOG}.c++.grpc_async_stream_infer 2>&1
if [ $? -eq 0 ]; then
RET=1
fi
Expand All @@ -138,7 +140,7 @@ if [ `grep -c "Stream has been closed" ${CLIENT_LOG}.c++.grpc_async_stream_infer
fi

# Test request timeout in http synchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v >> ${CLIENT_LOG}.c++.http_infer 2>&1
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v >> ${CLIENT_LOG}.c++.http_infer 2>&1
if [ $? -eq 0 ]; then
RET=1
fi
Expand All @@ -150,7 +152,7 @@ fi


# Test request timeout in http asynchronous inference
$CLIENT_TIMEOUT_TEST_CPP -t 1000 -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1
$CLIENT_TIMEOUT_TEST_CPP -t $SHORT_TIMEOUT_VALUE -v -a >> ${CLIENT_LOG}.c++.http_async_infer 2>&1
if [ $? -eq 0 ]; then
RET=1
fi
Expand Down Expand Up @@ -238,7 +240,8 @@ kill $SERVER_PID
wait $SERVER_PID

# Test all APIs other than infer
SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit"
export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=1
SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
sed -i 's#value: { string_value: "0" }#value: { string_value: "1" }#' $DATADIR/custom_identity_int32/config.pbtxt
run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -247,20 +250,27 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi
set +e
export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=1
for i in test_grpc_load_model \

for i in test_grpc_server_live \
test_grpc_is_server_ready \
test_grpc_is_model_ready \
test_grpc_get_server_metadata \
test_grpc_get_model_metadata \
test_grpc_get_model_config \
test_grpc_model_repository_index \
test_grpc_load_model \
test_grpc_unload_model \
test_grpc_inference_statistics \
test_grpc_get_inference_statistics \
test_grpc_update_trace_settings \
test_grpc_get_trace_settings \
test_grpc_update_log_settings \
test_grpc_get_log_settings \
test_grpc_get_system_shared_memory_status \
test_grpc_register_system_shared_memory \
test_grpc_get_system_shared_memory \
test_grpc_unregister_system_shared_memory \
test_grpc_register_cuda_shared_memory \
test_grpc_get_cuda_shared_memory_status \
test_grpc_uregister_cuda_shared_memory \
test_grpc_register_cuda_shared_memory \
test_grpc_unregister_cuda_shared_memory \
; do
python $CLIENT_TIMEOUT_TEST ClientTimeoutTest.$i >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
Expand Down
5 changes: 3 additions & 2 deletions src/grpc/grpc_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "grpc_utils.h"

#include <chrono>
#include <cstdlib>
#include <thread>

namespace triton { namespace server { namespace grpc {
Expand Down Expand Up @@ -83,8 +84,8 @@ GrpcStatusUtil::Create(::grpc::Status* status, TRITONSERVER_Error* err)
// Will delay the write of the response by the specified time.
// This can be used to test the flow where there are other
// responses available to be written.
LOG_INFO << "Delaying the write of the response by " << delay_response
<< " seconds";
LOG_VERBOSE(1) << "Delaying the write of the response by " << delay_response
<< " seconds";
std::this_thread::sleep_for(std::chrono::seconds(delay_response));
}

Expand Down

0 comments on commit 1d33f34

Please sign in to comment.