From bb6ad6058fc09a6a378d056d5b5cc9a299aa8f26 Mon Sep 17 00:00:00 2001
From: Swetha B S <swetha@multicorewareinc.com>
Date: Sat, 24 Sep 2022 09:47:05 -0400
Subject: [PATCH] ResizeTensor addition

---
 amd_openvx_extensions/amd_rpp/CMakeLists.txt  |   1 +
 .../amd_rpp/include/internal_publishKernels.h |   2 +
 .../amd_rpp/include/kernels_rpp.h             |   3 +-
 .../amd_rpp/include/vx_ext_rpp.h              |   1 +
 .../amd_rpp/source/Resizetensor.cpp           | 397 ++++++++++++++++++
 .../source/internal_publishKernels.cpp        |   1 +
 .../amd_rpp/source/kernel_rpp.cpp             |  25 ++
 .../geometry_augmentations/node_resize.cpp    |   4 +-
 rocAL/rocAL_pybind/amd/rocal/decoders.py      |  18 +-
 9 files changed, 440 insertions(+), 12 deletions(-)
 create mode 100644 amd_openvx_extensions/amd_rpp/source/Resizetensor.cpp

diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt
index e3e7cb3637..0b6669fc6a 100644
--- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt
+++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt
@@ -102,6 +102,7 @@ list(APPEND SOURCES
         source/RandomShadowbatchPD.cpp
         source/Remap.cpp
         source/ResizebatchPD.cpp
+        source/Resizetensor.cpp
         source/ResizeCropbatchPD.cpp
         source/ResizeCropMirrorPD.cpp
         source/RotatebatchPD.cpp
diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
index 21c47552df..7cc4ebc46b 100644
--- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
+++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h
@@ -115,6 +115,7 @@ vx_status ThresholdingbatchPD_Register(vx_context);
 vx_status VignettebatchPD_Register(vx_context);
 vx_status WarpAffinebatchPD_Register(vx_context);
 vx_status WarpPerspectivebatchPD_Register(vx_context);
+vx_status Resizetensor_Register(vx_context);
 
 // kernel names
 #define VX_KERNEL_RPP_NOP_NAME                          "org.rpp.Nop"
@@ -197,5 +198,6 @@ vx_status WarpPerspectivebatchPD_Register(vx_context);
 #define VX_KERNEL_RPP_CROPPD_NAME   							"org.rpp.CropPD"
 #define VX_KERNEL_RPP_RESIZECROPMIRRORPD_NAME      				"org.rpp.ResizeCropMirrorPD"
 #define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME                    "org.rpp.SequenceRearrange"
+#define VX_KERNEL_RPP_RESIZETENSOR_NAME      					"org.rpp.Resizetensor"
 
 #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_
diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
index cc54082427..644e735083 100644
--- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
+++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
@@ -111,7 +111,8 @@ extern "C"
         VX_KERNEL_RPP_TENSORLOOKUP = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4e,
         VX_KERNEL_RPP_VIGNETTEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x4f,
         VX_KERNEL_RPP_WARPAFFINEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x50,
-        VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x51
+        VX_KERNEL_RPP_WARPPERSPECTIVEBATCHPD = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x51,
+        VX_KERNEL_RPP_RESIZETENSOR = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x52
     };
 
 #ifdef __cplusplus
diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
index 4b9ad6d03c..494365d03c 100644
--- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
+++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
@@ -126,6 +126,7 @@ extern  "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_VignettebatchPD(vx_gr
 extern  "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpAffinebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array affine,vx_uint32 nbatchSize);
 extern  "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_WarpPerspectivebatchPD(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_array perspective,vx_uint32 nbatchSize);
 extern  "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_SequenceRearrange(vx_graph graph,vx_image pSrc,vx_image pDst, vx_array newOrder,vx_uint32 newSequenceLength, vx_uint32 sequenceLength, vx_uint32 sequenceCount);
+extern  "C" SHARED_PUBLIC vx_node VX_API_CALL vxExtrppNode_Resizetensor(vx_graph graph,vx_image pSrc,vx_array srcImgWidth,vx_array srcImgHeight,vx_image pDst,vx_array dstImgWidth,vx_array dstImgHeight,vx_int32 interpolation_type,vx_uint32 nbatchSize);
 #ifdef __cplusplus
 }
 #endif
diff --git a/amd_openvx_extensions/amd_rpp/source/Resizetensor.cpp b/amd_openvx_extensions/amd_rpp/source/Resizetensor.cpp
new file mode 100644
index 0000000000..7c274798c2
--- /dev/null
+++ b/amd_openvx_extensions/amd_rpp/source/Resizetensor.cpp
@@ -0,0 +1,397 @@
+/*
+Copyright (c) 2019 - 2022 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+#include "internal_publishKernels.h"
+
+struct ResizetensorLocalData
+{
+    RPPCommonHandle handle;
+    rppHandle_t rppHandle;
+    Rpp32u device_type;
+    Rpp32u nbatchSize;
+    RppiSize *srcDimensions;
+    RppiSize maxSrcDimensions;
+    RppiSize *dstDimensions;
+    RppiSize maxDstDimensions;
+    RppPtr_t pSrc;
+    RppPtr_t pDst;
+    Rpp32u *srcBatch_width;
+    Rpp32u *srcBatch_height;
+    Rpp32u *dstBatch_width;
+    Rpp32u *dstBatch_height;
+    RpptDescPtr srcDescPtr, dstDescPtr;
+    RpptROIPtr roiTensorPtrSrc;
+    RpptRoiType roiType;
+    RpptImagePatchPtr dstImgSize;
+    RpptDesc srcDesc, dstDesc;
+    RpptInterpolationType interpolation_type;
+#if ENABLE_OPENCL
+    cl_mem cl_pSrc;
+    cl_mem cl_pDst;
+#elif ENABLE_HIP
+    void *hip_pSrc;
+    void *hip_pDst;
+    RpptImagePatch *hip_dstImgSize;
+    RpptROI *hip_roiTensorPtrSrc;
+#endif
+};
+
+static vx_status VX_CALLBACK refreshResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num, ResizetensorLocalData *data)
+{
+    vx_status status = VX_SUCCESS;
+    STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[1], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[2], 0, data->nbatchSize, sizeof(Rpp32u), data->srcBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_width, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[5], 0, data->nbatchSize, sizeof(Rpp32u), data->dstBatch_height, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    for (int i = 0; i < data->nbatchSize; i++)
+    {
+        data->srcDimensions[i].width = data->roiTensorPtrSrc[i].xywhROI.roiWidth = data->srcBatch_width[i];
+        data->srcDimensions[i].height = data->roiTensorPtrSrc[i].xywhROI.roiHeight = data->srcBatch_height[i];
+        data->dstDimensions[i].width = data->dstImgSize[i].width = data->dstBatch_width[i];
+        data->dstDimensions[i].height = data->dstImgSize[i].height = data->dstBatch_height[i];
+        data->roiTensorPtrSrc[i].xywhROI.xy.x = 0;
+        data->roiTensorPtrSrc[i].xywhROI.xy.y = 0;
+    }
+    if (data->device_type == AGO_TARGET_AFFINITY_GPU)
+    {
+#if ENABLE_OPENCL
+        STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pSrc, sizeof(data->cl_pSrc)));
+        STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_OPENCL_BUFFER, &data->cl_pDst, sizeof(data->cl_pDst)));
+#elif ENABLE_HIP
+        STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pSrc, sizeof(data->hip_pSrc)));
+        STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HIP_BUFFER, &data->hip_pDst, sizeof(data->hip_pDst)));
+        hipMemcpy(data->hip_dstImgSize, data->dstImgSize, data->nbatchSize * sizeof(RpptImagePatch), hipMemcpyHostToDevice);
+        hipMemcpy(data->hip_roiTensorPtrSrc, data->roiTensorPtrSrc, data->nbatchSize * sizeof(RpptROI), hipMemcpyHostToDevice);
+#endif
+    }
+    if (data->device_type == AGO_TARGET_AFFINITY_CPU)
+    {
+        STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pSrc, sizeof(vx_uint8)));
+        STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_ATTRIBUTE_AMD_HOST_BUFFER, &data->pDst, sizeof(vx_uint8)));
+    }
+    return status;
+}
+
+static vx_status VX_CALLBACK validateResizetensor(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[])
+{
+    vx_status status = VX_SUCCESS;
+    vx_enum scalar_type;
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_INT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_UINT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #7 type=%d (must be size)\n", scalar_type);
+    STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
+    if (scalar_type != VX_TYPE_UINT32)
+        return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #8 type=%d (must be size)\n", scalar_type);
+    // Check for input parameters
+    vx_parameter input_param;
+    vx_image input;
+    vx_df_image df_image;
+    input_param = vxGetParameterByIndex(node, 0);
+    STATUS_ERROR_CHECK(vxQueryParameter(input_param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(vx_image)));
+    STATUS_ERROR_CHECK(vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image)));
+    if (df_image != VX_DF_IMAGE_U8 && df_image != VX_DF_IMAGE_RGB)
+    {
+        return ERRMSG(VX_ERROR_INVALID_FORMAT, "validate: Resizetensor: image: #0 format=%4.4s (must be RGB2 or U008)\n", (char *)&df_image);
+    }
+
+    // Check for output parameters
+    vx_image output;
+    vx_df_image format;
+    vx_parameter output_param;
+    vx_uint32 height, width;
+    output_param = vxGetParameterByIndex(node, 3);
+    STATUS_ERROR_CHECK(vxQueryParameter(output_param, VX_PARAMETER_ATTRIBUTE_REF, &output, sizeof(vx_image)));
+    STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width)));
+    STATUS_ERROR_CHECK(vxQueryImage(output, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height)));
+    STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[3], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image)));
+    vxReleaseImage(&input);
+    vxReleaseImage(&output);
+    vxReleaseParameter(&output_param);
+    vxReleaseParameter(&input_param);
+    return status;
+}
+
+static vx_status VX_CALLBACK processResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    RppStatus rpp_status = RPP_SUCCESS;
+    vx_status return_status = VX_SUCCESS;
+    ResizetensorLocalData *data = NULL;
+    STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+    vx_df_image df_image = VX_DF_IMAGE_VIRT;
+    STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image)));
+    vx_int32 output_format_toggle = 0;
+    if (data->device_type == AGO_TARGET_AFFINITY_GPU)
+    {
+#if ENABLE_OPENCL
+        refreshResizetensor(node, parameters, num, data);
+        if (df_image == VX_DF_IMAGE_U8)
+        {
+            rpp_status = rppi_resize_u8_pln1_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle);
+        }
+        else if (df_image == VX_DF_IMAGE_RGB)
+        {
+            rpp_status = rppi_resize_u8_pkd3_batchPD_gpu((void *)data->cl_pSrc, data->srcDimensions, data->maxSrcDimensions, (void *)data->cl_pDst, data->dstDimensions, data->maxDstDimensions, output_format_toggle, data->nbatchSize, data->rppHandle);
+        }
+        return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#elif ENABLE_HIP
+        refreshResizetensor(node, parameters, num, data);
+        rpp_status = rppt_resize_gpu(data->hip_pSrc, data->srcDescPtr, data->hip_pDst, data->dstDescPtr, data->hip_dstImgSize, data->interpolation_type, data->hip_roiTensorPtrSrc, data->roiType, data->rppHandle);
+        return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+#endif
+    }
+    if (data->device_type == AGO_TARGET_AFFINITY_CPU)
+    {
+        refreshResizetensor(node, parameters, num, data);
+        rpp_status = rppt_resize_host(data->pSrc, data->srcDescPtr, data->pDst, data->dstDescPtr, data->dstImgSize, data->interpolation_type, data->roiTensorPtrSrc, data->roiType, data->rppHandle);
+        return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
+    }
+    return return_status;
+}
+
+static vx_status VX_CALLBACK initializeResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    ResizetensorLocalData *data = new ResizetensorLocalData;
+    memset(data, 0, sizeof(*data));
+#if ENABLE_OPENCL
+    STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_OPENCL_COMMAND_QUEUE, &data->handle.cmdq, sizeof(data->handle.cmdq)));
+#elif ENABLE_HIP
+    STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_ATTRIBUTE_AMD_HIP_STREAM, &data->handle.hipstream, sizeof(data->handle.hipstream)));
+#endif
+    STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &data->nbatchSize));
+    int interpolation_type;
+    STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &interpolation_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
+    data->srcDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize);
+    data->dstDimensions = (RppiSize *)malloc(sizeof(RppiSize) * data->nbatchSize);
+    data->srcBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize);
+    data->srcBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize);
+    data->dstBatch_width = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize);
+    data->dstBatch_height = (Rpp32u *)malloc(sizeof(Rpp32u) * data->nbatchSize);
+    data->dstImgSize = (RpptImagePatch *)malloc(sizeof(RpptImagePatch) * data->nbatchSize);
+
+    STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_HEIGHT, &data->maxSrcDimensions.height, sizeof(data->maxSrcDimensions.height)));
+    STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_WIDTH, &data->maxSrcDimensions.width, sizeof(data->maxSrcDimensions.width)));
+    data->maxSrcDimensions.height = data->maxSrcDimensions.height / data->nbatchSize;
+    STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_HEIGHT, &data->maxDstDimensions.height, sizeof(data->maxDstDimensions.height)));
+    STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[3], VX_IMAGE_WIDTH, &data->maxDstDimensions.width, sizeof(data->maxDstDimensions.width)));
+    data->maxDstDimensions.height = data->maxDstDimensions.height / data->nbatchSize;
+
+    // Check if it is a RGB or single channel U8 input
+    vx_df_image df_image = VX_DF_IMAGE_VIRT;
+    STATUS_ERROR_CHECK(vxQueryImage((vx_image)parameters[0], VX_IMAGE_ATTRIBUTE_FORMAT, &df_image, sizeof(df_image)));
+    uint ip_channel = (df_image == VX_DF_IMAGE_RGB) ? 3 : 1;
+
+    // Set interpolation type
+    switch(interpolation_type)
+    {
+        case 0:
+            data->interpolation_type = RpptInterpolationType::NEAREST_NEIGHBOR;
+            break;
+        case 1:
+            data->interpolation_type = RpptInterpolationType::BILINEAR;
+            break;
+        case 2:
+            data->interpolation_type = RpptInterpolationType::BICUBIC;
+            break;
+        case 3:
+            data->interpolation_type = RpptInterpolationType::LANCZOS;
+            break;
+        case 4:
+            data->interpolation_type = RpptInterpolationType::TRIANGULAR;
+            break;
+        case 5:
+            data->interpolation_type = RpptInterpolationType::GAUSSIAN;
+            break;
+    }
+
+    // Initializing tensor config parameters.
+    data->srcDescPtr = &data->srcDesc;
+    data->dstDescPtr = &data->dstDesc;
+
+    data->srcDescPtr->dataType = RpptDataType::U8;
+    data->dstDescPtr->dataType = RpptDataType::U8;
+
+    // Set numDims, offset, n/c/h/w values for src/dst
+    data->srcDescPtr->numDims = 4;
+    data->dstDescPtr->numDims = 4;
+    data->srcDescPtr->offsetInBytes = 0;
+    data->dstDescPtr->offsetInBytes = 0;
+    data->srcDescPtr->n = data->nbatchSize;
+    data->srcDescPtr->h = data->maxSrcDimensions.height;
+    data->srcDescPtr->w = data->maxSrcDimensions.width;
+    data->srcDescPtr->c = ip_channel;
+    data->dstDescPtr->n = data->nbatchSize;
+    data->dstDescPtr->h = data->maxDstDimensions.height;
+    data->dstDescPtr->w = data->maxDstDimensions.width;
+    data->dstDescPtr->c = ip_channel;
+    // Set layout and n/c/h/w strides for src/dst
+    if(df_image == VX_DF_IMAGE_U8) // For PLN1 images
+    {
+        data->srcDescPtr->layout = RpptLayout::NCHW;
+        data->dstDescPtr->layout = RpptLayout::NCHW;
+        data->srcDescPtr->strides.nStride = ip_channel * data->srcDescPtr->w * data->srcDescPtr->h;
+        data->srcDescPtr->strides.cStride = data->srcDescPtr->w * data->srcDescPtr->h;
+        data->srcDescPtr->strides.hStride = data->srcDescPtr->w;
+        data->srcDescPtr->strides.wStride = 1;
+        data->dstDescPtr->strides.nStride = ip_channel * data->dstDescPtr->w * data->dstDescPtr->h;
+        data->dstDescPtr->strides.cStride = data->dstDescPtr->w * data->dstDescPtr->h;
+        data->dstDescPtr->strides.hStride = data->dstDescPtr->w;
+        data->dstDescPtr->strides.wStride = 1;
+    }
+    else // For RGB (NHWC/NCHW) images
+    {
+        data->srcDescPtr->layout = RpptLayout::NHWC;
+        data->dstDescPtr->layout = RpptLayout::NHWC;
+        data->srcDescPtr->strides.nStride = ip_channel * data->srcDescPtr->w * data->srcDescPtr->h;
+        data->srcDescPtr->strides.hStride = ip_channel * data->srcDescPtr->w;
+        data->srcDescPtr->strides.wStride = ip_channel;
+        data->srcDescPtr->strides.cStride = 1;
+        data->dstDescPtr->strides.nStride = ip_channel * data->dstDescPtr->w * data->dstDescPtr->h;
+        data->dstDescPtr->strides.hStride = ip_channel * data->dstDescPtr->w;
+        data->dstDescPtr->strides.wStride = ip_channel;
+        data->dstDescPtr->strides.cStride = 1;
+    }
+
+    // Initialize ROI tensors for src/dst
+    data->roiTensorPtrSrc  = (RpptROI *) calloc(data->nbatchSize, sizeof(RpptROI));
+
+    // Set ROI tensors types for src/dst
+    data->roiType = RpptRoiType::XYWH;
+#if ENABLE_HIP
+    hipMalloc(&data->hip_dstImgSize, data->nbatchSize * sizeof(RpptImagePatch));
+    hipMalloc(&data->hip_roiTensorPtrSrc, data->nbatchSize * sizeof(RpptROI));
+#endif
+    refreshResizetensor(node, parameters, num, data);
+#if ENABLE_OPENCL
+    if (data->device_type == AGO_TARGET_AFFINITY_GPU)
+        rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.cmdq, data->nbatchSize);
+#elif ENABLE_HIP
+    if (data->device_type == AGO_TARGET_AFFINITY_GPU)
+        rppCreateWithStreamAndBatchSize(&data->rppHandle, data->handle.hipstream, data->nbatchSize);
+#endif
+    if (data->device_type == AGO_TARGET_AFFINITY_CPU)
+        rppCreateWithBatchSize(&data->rppHandle, data->nbatchSize);
+
+    STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+    return VX_SUCCESS;
+}
+
+static vx_status VX_CALLBACK uninitializeResizetensor(vx_node node, const vx_reference *parameters, vx_uint32 num)
+{
+    ResizetensorLocalData *data;
+    STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
+#if ENABLE_HIP
+    hipFree(data->hip_dstImgSize);
+    hipFree(data->hip_roiTensorPtrSrc);
+#endif
+#if ENABLE_OPENCL || ENABLE_HIP
+    if (data->device_type == AGO_TARGET_AFFINITY_GPU)
+        rppDestroyGPU(data->rppHandle);
+#endif
+    if (data->device_type == AGO_TARGET_AFFINITY_CPU)
+        rppDestroyHost(data->rppHandle);
+    free(data->srcDimensions);
+    free(data->dstDimensions);
+    free(data->srcBatch_width);
+    free(data->srcBatch_height);
+    free(data->dstBatch_width);
+    free(data->dstBatch_height);
+    free(data->roiTensorPtrSrc);
+    free(data->dstImgSize);
+    delete (data);
+    return VX_SUCCESS;
+}
+
+//! \brief The kernel target support callback.
+// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph
+
+static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
+                                                  vx_bool use_opencl_1_2,              // [input]  false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
+                                                  vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
+)
+{
+    vx_context context = vxGetContext((vx_reference)graph);
+    AgoTargetAffinityInfo affinity;
+    vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+    if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+        supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
+    else
+        supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+
+        // hardcode the affinity to  CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes
+#if ENABLE_OPENCL
+    supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
+#endif
+    return VX_SUCCESS;
+}
+
+vx_status Resizetensor_Register(vx_context context)
+{
+    vx_status status = VX_SUCCESS;
+    // Add kernel to the context with callbacks
+    vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Resizetensor",
+                                       VX_KERNEL_RPP_RESIZETENSOR,
+                                       processResizetensor,
+                                       9,
+                                       validateResizetensor,
+                                       initializeResizetensor,
+                                       uninitializeResizetensor);
+    ERROR_CHECK_OBJECT(kernel);
+    AgoTargetAffinityInfo affinity;
+    vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
+#if ENABLE_OPENCL || ENABLE_HIP
+    // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers
+    vx_bool enableBufferAccess = vx_true_e;
+    if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
+        STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
+#else
+    vx_bool enableBufferAccess = vx_false_e;
+#endif
+    amd_kernel_query_target_support_f query_target_support_f = query_target_support;
+
+    if (kernel)
+    {
+        STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
+        PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
+    }
+    if (status != VX_SUCCESS)
+    {
+    exit:
+        vxRemoveKernel(kernel);
+        return VX_FAILURE;
+    }
+    return status;
+}
diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
index 566ff8a376..92aea1c61b 100644
--- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp
@@ -124,6 +124,7 @@ vx_status get_kernels_to_publish()
     STATUS_ERROR_CHECK(ADD_KERENEL(Copy_Register));
     STATUS_ERROR_CHECK(ADD_KERENEL(Nop_Register));
     STATUS_ERROR_CHECK(ADD_KERENEL(SequenceRearrange_Register));
+    STATUS_ERROR_CHECK(ADD_KERENEL(Resizetensor_Register));
     return status;
 }
 
diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
index 65545c67ab..176c877915 100644
--- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
+++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
@@ -850,6 +850,31 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ResizebatchPD(vx_graph graph, vx_i
     return node;
 }
 
+VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_Resizetensor(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_int32 interpolation_type, vx_uint32 nbatchSize)
+{
+    vx_node node = NULL;
+    vx_context context = vxGetContext((vx_reference)graph);
+    if (vxGetStatus((vx_reference)context) == VX_SUCCESS)
+    {
+        vx_uint32 dev_type = getGraphAffinity(graph);
+        vx_scalar DEV_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type);
+        vx_scalar NBATCHSIZE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &nbatchSize);
+        vx_scalar INTERPOLATION_TYPE = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_INT32, &interpolation_type);
+        vx_reference params[] = {
+            (vx_reference)pSrc,
+            (vx_reference)srcImgWidth,
+            (vx_reference)srcImgHeight,
+            (vx_reference)pDst,
+            (vx_reference)dstImgWidth,
+            (vx_reference)dstImgHeight,
+            (vx_reference)INTERPOLATION_TYPE,
+            (vx_reference)NBATCHSIZE,
+            (vx_reference)DEV_TYPE};
+        node = createNode(graph, VX_KERNEL_RPP_RESIZETENSOR, params, 9);
+    }
+    return node;
+}
+
 VX_API_ENTRY vx_node VX_API_CALL vxExtrppNode_ResizeCropbatchPD(vx_graph graph, vx_image pSrc, vx_array srcImgWidth, vx_array srcImgHeight, vx_image pDst, vx_array dstImgWidth, vx_array dstImgHeight, vx_array x1, vx_array y1, vx_array x2, vx_array y2, vx_uint32 nbatchSize)
 {
     vx_node node = NULL;
diff --git a/rocAL/rocAL/source/augmentations/geometry_augmentations/node_resize.cpp b/rocAL/rocAL/source/augmentations/geometry_augmentations/node_resize.cpp
index 3cbc4bd598..8d8b419f89 100644
--- a/rocAL/rocAL/source/augmentations/geometry_augmentations/node_resize.cpp
+++ b/rocAL/rocAL/source/augmentations/geometry_augmentations/node_resize.cpp
@@ -48,9 +48,9 @@ void ResizeNode::create_node()
     height_status = vxAddArrayItems(_dst_roi_height, _batch_size, dst_roi_height.data(), sizeof(vx_uint32));
      if(width_status != 0 || height_status != 0)
         THROW(" vxAddArrayItems failed in the resize (vxExtrppNode_ResizebatchPD) node: "+ TOSTR(width_status) + "  "+ TOSTR(height_status))
+    // _node = vxExtrppNode_ResizebatchPD(_graph->get(), _inputs[0]->handle(), _src_roi_width, _src_roi_height, _outputs[0]->handle(), _dst_roi_width, _dst_roi_height, _batch_size);
 
-   _node = vxExtrppNode_ResizebatchPD(_graph->get(), _inputs[0]->handle(), _src_roi_width, _src_roi_height, _outputs[0]->handle(), _dst_roi_width, _dst_roi_height, _batch_size);
-
+   _node = vxExtrppNode_Resizetensor(_graph->get(), _inputs[0]->handle(), _src_roi_width, _src_roi_height, _outputs[0]->handle(), _dst_roi_width, _dst_roi_height, _interpolation_type, _batch_size);
     vx_status status;
     if((status = vxGetStatus((vx_reference)_node)) != VX_SUCCESS)
         THROW("Adding the resize (vxExtrppNode_ResizebatchPD) node failed: "+ TOSTR(status))
diff --git a/rocAL/rocAL_pybind/amd/rocal/decoders.py b/rocAL/rocAL_pybind/amd/rocal/decoders.py
index 4b213e7317..bb3a02c40d 100644
--- a/rocAL/rocAL_pybind/amd/rocal/decoders.py
+++ b/rocAL/rocAL_pybind/amd/rocal/decoders.py
@@ -191,8 +191,8 @@ def image_random_crop(*inputs, user_feature_key_map=None, path='', file_root='',
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": types.USER_GIVEN_SIZE_ORIG,
-            "max_width": 3000,
-            "max_height": 3000,
+            "max_width": 2000,
+            "max_height": 2000,
             "x_drift_factor": None,
             "y_drift_factor": None}
         crop_output_image = b.FusedDecoderCropShard(
@@ -202,7 +202,7 @@ def image_random_crop(*inputs, user_feature_key_map=None, path='', file_root='',
 
 
 def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0, num_shards=1, random_shuffle=False, affine=True, axes=None, axis_names="WH", bytes_per_sample_hint=0, device_memory_padding=16777216,
-                device_memory_padding_jpeg2k=0, host_memory_padding=8388608, random_aspect_ratio=[0.8, 1.25], random_area=[0.08, 1.0], num_attemps=100,
+                device_memory_padding_jpeg2k=0, host_memory_padding=8388608, random_aspect_ratio=[0.8, 1.25], random_area=[0.08, 1.0], num_attempts=100,
                 host_memory_padding_jpeg2k=0, hybrid_huffman_threshold=1000000,
                 memory_stats=False, normalized_anchor=True, normalized_shape=True, output_type=types.RGB,
                 preserve=False, seed=1, split_stages=False, use_chunk_allocator=False, use_fast_idct=False, device=None):
@@ -222,7 +222,7 @@ def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0,
             'is_output': False,
             "area_factor": random_area,
             "aspect_ratio": random_aspect_ratio,
-            "num_attemps": num_attemps,
+            "num_attempts": num_attempts,
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": types.MAX_SIZE,
@@ -241,7 +241,7 @@ def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0,
             'is_output': False,
             "area_factor": random_area,
             "aspect_ratio": random_aspect_ratio,
-            "num_attemps": num_attemps,
+            "num_attempts": num_attempts,
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": types.MAX_SIZE,
@@ -260,7 +260,7 @@ def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0,
             'is_output': False,
             "area_factor": random_area,
             "aspect_ratio": random_aspect_ratio,
-            "num_attemps": num_attemps,
+            "num_attempts": num_attempts,
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": types.MAX_SIZE,
@@ -279,12 +279,12 @@ def image_slice(*inputs, file_root='', path='', annotations_file='', shard_id=0,
             'is_output': False,
             "area_factor": random_area,
             "aspect_ratio": random_aspect_ratio,
-            "num_attemps": num_attemps,
+            "num_attempts": num_attempts,
             "shuffle": random_shuffle,
             "loop": False,
             "decode_size_policy": types.USER_GIVEN_SIZE_ORIG,
-            "max_width": 3000,
-            "max_height": 3000,
+            "max_width": 2000,
+            "max_height": 2000,
             "x_drift_factor": None,
             "y_drift_factor": None}
         image_decoder_slice = b.FusedDecoderCropShard(