From 33262a4cb8bc75a9f790a16c5247fb2dd66d71e9 Mon Sep 17 00:00:00 2001 From: GuanLuo Date: Mon, 24 Apr 2023 16:37:54 -0700 Subject: [PATCH 1/2] Modify timeout test in L0_sequence_batcher to use portable backend --- .../config.pbtxt | 39 ++++++++++++++++--- .../sequence_batcher_test.py | 31 +++++++++------ qa/L0_sequence_batcher/test.sh | 3 +- 3 files changed, 54 insertions(+), 19 deletions(-) rename qa/L0_sequence_batcher/request_timeout_models/{identity_fp32_timeout => custom_sequence_int32_timeout}/config.pbtxt (75%) diff --git a/qa/L0_sequence_batcher/request_timeout_models/identity_fp32_timeout/config.pbtxt b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt similarity index 75% rename from qa/L0_sequence_batcher/request_timeout_models/identity_fp32_timeout/config.pbtxt rename to qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt index f1dc316de1..84b6d62411 100644 --- a/qa/L0_sequence_batcher/request_timeout_models/identity_fp32_timeout/config.pbtxt +++ b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt @@ -24,22 +24,22 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -name: "identity_fp32_timeout" -backend: "python" +backend: "sequence" +default_model_filename: "libtriton_sequence.so" max_batch_size: 1 input [ { - name: "INPUT0" - data_type: TYPE_FP32 + name: "INPUT" + data_type: TYPE_INT32 dims: [ 1 ] } ] output [ { - name: "OUTPUT0" - data_type: TYPE_FP32 + name: "OUTPUT" + data_type: TYPE_INT32 dims: [ 1 ] } ] @@ -53,4 +53,31 @@ instance_group [ sequence_batching { max_sequence_idle_microseconds: 50000000 + control_input [ + { + name: "START" + control [ + { + kind: CONTROL_SEQUENCE_START + int32_false_true: [ 0, 1 ] + } + ] + }, + { + name: "READY" + control [ + { + kind: CONTROL_SEQUENCE_READY + int32_false_true: [ 0, 1 ] + } + ] + } + ] } + +parameters [ + { + key: "execute_delay_ms" + value: { string_value: "3000" } + } +] \ No newline at end of file diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py index 0c30b23c5b..8d71f11c87 100644 --- a/qa/L0_sequence_batcher/sequence_batcher_test.py +++ b/qa/L0_sequence_batcher/sequence_batcher_test.py @@ -2860,15 +2860,21 @@ def setUp(self): self.server_address_ = os.environ.get('TRITONSERVER_IPADDR', 'localhost') + ":8001" - self.model_name_ = "identity_fp32_timeout" - self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.float32) - self.inputs_ = [grpcclient.InferInput('INPUT0', [1, 1], "FP32")] + # Prepare input and expected output based on the model and + # the infer sequence sent for testing. If the test is to be extended + # for different sequence and model, then proper grouping should be added + self.model_name_ = "custom_sequence_int32_timeout" + self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32) + self.inputs_ = [grpcclient.InferInput('INPUT', [1, 1], "INT32")] self.inputs_[0].set_data_from_numpy(self.tensor_data_) + self.expected_out_seq_ = [("OUTPUT", self.tensor_data_), + ("OUTPUT", self.tensor_data_ * 2), + ("OUTPUT", self.tensor_data_ * 3)] def send_sequence_with_timeout(self, seq_id, callback, - timeout_us=3000000, + timeout_us=2000000, request_pause_sec=0): with grpcclient.InferenceServerClient( self.server_address_) as triton_client: @@ -2897,8 +2903,8 @@ def test_request_timeout(self): # expect the timeout will only be expired on backlog sequence and reject # all requests of the sequence once expired. # Sending two sequences while the model can only process one sequence - # at a time. Each model execution takes 5 second and all requests have - # 3 second timeout, so the second sequence will be rejected. + # at a time. Each model execution takes 3 second and all requests have + # 2 second timeout, so the second sequence will be rejected. # correlation ID is 1-index seq1_res = [] @@ -2920,16 +2926,19 @@ def test_request_timeout(self): for t in threads: t.join() - for result, error in seq1_res: + for idx in range(len(seq1_res)): + result, error = seq1_res[idx] self.assertIsNone( error, "Expect sucessful inference for sequence 1 requests, got error: {}" .format(error)) + out = result.as_numpy(self.expected_out_seq_[idx][0]) + expected_out = self.expected_out_seq_[idx][1] np.testing.assert_allclose( - result.as_numpy("OUTPUT0"), - self.tensor_data_, - err_msg="Unexpected output tensor, got {}".format( - result.as_numpy("OUTPUT0"))) + out, + expected_out, + err_msg="Unexpected output tensor: expect {}, got {}".format( + expected_out, out)) for _, error in seq2_res: self.assertIsNotNone(error, "Expect error for sequence 2 requests") diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh index 4a969dba0c..22d6da91bc 100755 --- a/qa/L0_sequence_batcher/test.sh +++ b/qa/L0_sequence_batcher/test.sh @@ -735,8 +735,7 @@ if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1 TEST_CASE=SequenceBatcherRequestTimeoutTest MODEL_PATH=request_timeout_models - mkdir -p ${MODEL_PATH}/identity_fp32_timeout/1 - cp ../python_models/identity_fp32_timeout/model.py ${MODEL_PATH}/identity_fp32_timeout/1/. + cp -r ../custom_models/custom_sequence_int32/1 ${MODEL_PATH}/custom_sequence_int32_timeout SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}" SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log" From 6efb0fd977e0ac4c7d2df9e74ea7775078fc97ef Mon Sep 17 00:00:00 2001 From: GuanLuo Date: Tue, 25 Apr 2023 16:44:33 -0700 Subject: [PATCH 2/2] Use identity backend that is built by default on Windows --- .../config.pbtxt | 27 +++---------------- .../sequence_batcher_test.py | 8 +++--- qa/L0_sequence_batcher/test.sh | 2 +- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt index 84b6d62411..1573534432 100644 --- a/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt +++ b/qa/L0_sequence_batcher/request_timeout_models/custom_sequence_int32_timeout/config.pbtxt @@ -24,13 +24,12 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -backend: "sequence" -default_model_filename: "libtriton_sequence.so" +backend: "identity" max_batch_size: 1 input [ { - name: "INPUT" + name: "INPUT0" data_type: TYPE_INT32 dims: [ 1 ] } @@ -38,7 +37,7 @@ input [ output [ { - name: "OUTPUT" + name: "OUTPUT0" data_type: TYPE_INT32 dims: [ 1 ] } @@ -53,26 +52,6 @@ instance_group [ sequence_batching { max_sequence_idle_microseconds: 50000000 - control_input [ - { - name: "START" - control [ - { - kind: CONTROL_SEQUENCE_START - int32_false_true: [ 0, 1 ] - } - ] - }, - { - name: "READY" - control [ - { - kind: CONTROL_SEQUENCE_READY - int32_false_true: [ 0, 1 ] - } - ] - } - ] } parameters [ diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py index 8d71f11c87..8d7b4c1fd4 100644 --- a/qa/L0_sequence_batcher/sequence_batcher_test.py +++ b/qa/L0_sequence_batcher/sequence_batcher_test.py @@ -2865,11 +2865,11 @@ def setUp(self): # for different sequence and model, then proper grouping should be added self.model_name_ = "custom_sequence_int32_timeout" self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32) - self.inputs_ = [grpcclient.InferInput('INPUT', [1, 1], "INT32")] + self.inputs_ = [grpcclient.InferInput('INPUT0', [1, 1], "INT32")] self.inputs_[0].set_data_from_numpy(self.tensor_data_) - self.expected_out_seq_ = [("OUTPUT", self.tensor_data_), - ("OUTPUT", self.tensor_data_ * 2), - ("OUTPUT", self.tensor_data_ * 3)] + self.expected_out_seq_ = [("OUTPUT0", self.tensor_data_), + ("OUTPUT0", self.tensor_data_), + ("OUTPUT0", self.tensor_data_)] def send_sequence_with_timeout(self, seq_id, diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh index 22d6da91bc..7d90641b0d 100755 --- a/qa/L0_sequence_batcher/test.sh +++ b/qa/L0_sequence_batcher/test.sh @@ -735,7 +735,7 @@ if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1 TEST_CASE=SequenceBatcherRequestTimeoutTest MODEL_PATH=request_timeout_models - cp -r ../custom_models/custom_sequence_int32/1 ${MODEL_PATH}/custom_sequence_int32_timeout + mkdir -p ${MODEL_PATH}/custom_sequence_int32_timeout/1 SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}" SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"