From 8c3d5fa84f5488b4ca4e365074983129913b8767 Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Mon, 17 Feb 2025 00:37:58 +0530
Subject: [PATCH 1/2] test

---
 examples/decoupled/repeat_model.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/examples/decoupled/repeat_model.py b/examples/decoupled/repeat_model.py
index b626e1a5..15dc6d54 100644
--- a/examples/decoupled/repeat_model.py
+++ b/examples/decoupled/repeat_model.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -112,6 +112,12 @@ def initialize(self, args):
         self.out_dtype = pb_utils.triton_string_to_numpy(out_config["data_type"])
         self.idx_dtype = pb_utils.triton_string_to_numpy(idx_config["data_type"])
 
+        # Optional parameter to specify the number of elements in the OUT tensor in each response.
+        # Defaults to 1 if not provided. Example: If input 'IN' is [4] and 'output_num_elements' is set to 3,
+        # then 'OUT' will be [4, 4, 4]. If 'output_num_elements' is not specified, 'OUT' will default to [4].
+        parameters = self.model_config.get("parameters", {})
+        self.output_num_elements = int(parameters.get("output_num_elements", {}).get("string_value", 1))
+
         # To keep track of response threads so that we can delay
         # the finalizing the model until all response threads
         # have completed.
@@ -209,7 +215,10 @@ def response_thread(self, response_sender, in_input, delay_input):
             time.sleep(delay_value / 1000)
 
             idx_output = pb_utils.Tensor("IDX", numpy.array([idx], idx_dtype))
-            out_output = pb_utils.Tensor("OUT", numpy.array([in_value], out_dtype))
+            out_output = pb_utils.Tensor(
+                "OUT",
+                numpy.full((self.output_num_elements,), in_value, dtype=out_dtype),
+            )
             response = pb_utils.InferenceResponse(
                 output_tensors=[idx_output, out_output]
             )

From 08cb5179f6ff6cf6603a8a60e41cc336f3a9903e Mon Sep 17 00:00:00 2001
From: Sai Kiran Polisetty <spolisetty@nvidia.com>
Date: Mon, 17 Feb 2025 16:37:47 +0000
Subject: [PATCH 2/2] Update

---
 examples/decoupled/repeat_model.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/decoupled/repeat_model.py b/examples/decoupled/repeat_model.py
index 15dc6d54..b96a6804 100644
--- a/examples/decoupled/repeat_model.py
+++ b/examples/decoupled/repeat_model.py
@@ -116,7 +116,9 @@ def initialize(self, args):
         # Defaults to 1 if not provided. Example: If input 'IN' is [4] and 'output_num_elements' is set to 3,
         # then 'OUT' will be [4, 4, 4]. If 'output_num_elements' is not specified, 'OUT' will default to [4].
         parameters = self.model_config.get("parameters", {})
-        self.output_num_elements = int(parameters.get("output_num_elements", {}).get("string_value", 1))
+        self.output_num_elements = int(
+            parameters.get("output_num_elements", {}).get("string_value", 1)
+        )
 
         # To keep track of response threads so that we can delay
         # the finalizing the model until all response threads