From ba6ff14530bde23cab99865499cf8052db55091e Mon Sep 17 00:00:00 2001 From: Swetha B S Date: Thu, 14 Mar 2024 20:39:33 +0000 Subject: [PATCH 01/16] Merging swbs_m2/audio/pr5 into swbs_m3/audio/pr6 --- amd_openvx_extensions/amd_rpp/CMakeLists.txt | 2 +- .../amd_rpp/include/internal_publishKernels.h | 4 +-- .../amd_rpp/include/kernels_rpp.h | 6 ++-- .../amd_rpp/include/vx_ext_rpp.h | 18 +++++----- .../source/internal_publishKernels.cpp | 2 +- .../amd_rpp/source/kernel_rpp.cpp | 36 +++++++++---------- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt index d4e255b99..21101b831 100644 --- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt +++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt @@ -133,6 +133,7 @@ list(APPEND SOURCES source/tensor/Copy.cpp source/tensor/Crop.cpp source/tensor/CropMirrorNormalize.cpp + source/tensor/Downmix.cpp source/tensor/Exposure.cpp source/tensor/FishEye.cpp source/tensor/Flip.cpp @@ -160,7 +161,6 @@ list(APPEND SOURCES source/tensor/WarpAffine.cpp source/tensor/SequenceRearrange.cpp source/tensor/PreemphasisFilter.cpp - source/tensor/Downmix.cpp source/kernel_rpp.cpp source/internal_publishKernels.cpp ) diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h index f982e9cf7..0376dfbda 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h @@ -156,9 +156,9 @@ vx_status Vignette_Register(vx_context); vx_status WarpAffine_Register(vx_context); vx_status SequenceRearrange_Register(vx_context); vx_status PreemphasisFilter_Register(vx_context); -vx_status Downmix_Register(vx_context); vx_status NonSilentRegion_Register(vx_context); vx_status Spectrogram_Register(vx_context); +vx_status Downmix_Register(vx_context); // kernel names #define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD" @@ -284,8 +284,8 @@ vx_status Spectrogram_Register(vx_context); #define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize" #define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange" #define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter" -#define VX_KERNEL_RPP_DOWNMIX_NAME "org.rpp.Downmix" #define VX_KERNEL_RPP_NONSILENTREGION_NAME "org.rpp.NonSilentRegion" #define VX_KERNEL_RPP_SPECTROGRAM_NAME "org.rpp.Spectrogram" +#define VX_KERNEL_RPP_DOWNMIX_NAME "org.rpp.Downmix" #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_ diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index 49c6770ba..e440aa174 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -149,9 +149,9 @@ extern "C" VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72, VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73, VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74, - VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75, - VX_KERNEL_RPP_NONSILENTREGION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76, - VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77, + VX_KERNEL_RPP_NONSILENTREGION = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75, + VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76, + VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77 }; #ifdef __cplusplus diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index 09c0e240a..490c1ad4e 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1876,15 +1876,6 @@ extern "C" * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array preemphCoeff, vx_scalar borderType); - /*! \brief [Graph] Applies downmixing to the input tensor. - * \ingroup group_amd_rpp - * \param [in] graph The handle to the graph. - * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. - * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. - * \param [in] pDstRoi The output tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format. - * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. - */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi); /*! \brief [Graph] Performs leading and trailing silence detection to the input tensor. * \ingroup group_amd_rpp @@ -1920,6 +1911,15 @@ extern "C" */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcLength, vx_tensor pDst, vx_tensor pDstDims, vx_array windowFn, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout, vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep); + /*! \brief [Graph] Applies downmixing to the input tensor. + * \ingroup group_amd_rpp + * \param [in] graph The handle to the graph. + * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. + * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. + * \param [in] pDstRoi The output tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format. + * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. + */ + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi); #ifdef __cplusplus } diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp index de45d734c..779190c21 100644 --- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp @@ -161,9 +161,9 @@ vx_status get_kernels_to_publish() STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(PreemphasisFilter_Register)); - STATUS_ERROR_CHECK(ADD_KERNEL(Downmix_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(NonSilentRegion_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register)); + STATUS_ERROR_CHECK(ADD_KERNEL(Downmix_Register)); return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index afb65e74f..467ed1e15 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2561,24 +2561,6 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_te return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi) -{ - vx_node node = NULL; - vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar devType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); - vx_reference params[] = { - (vx_reference)pSrc, - (vx_reference)pDst, - (vx_reference)srcRoi, - (vx_reference)devType}; - node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4); - } - return node; -} - VX_API_ENTRY vx_node VX_API_CALL vxExtRppNonSilentRegion(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcLength, vx_tensor pDst1, vx_tensor pDst2, vx_scalar cutOffDB, vx_scalar referencePower, vx_scalar windowLength, vx_scalar resetInterval) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); @@ -2626,6 +2608,24 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p return node; } +VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi) +{ + vx_node node = NULL; + vx_context context = vxGetContext((vx_reference)graph); + if (vxGetStatus((vx_reference)context) == VX_SUCCESS) + { + vx_uint32 dev_type = getGraphAffinity(graph); + vx_scalar devType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); + vx_reference params[] = { + (vx_reference)pSrc, + (vx_reference)pDst, + (vx_reference)srcRoi, + (vx_reference)devType}; + node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4); + } + return node; +} + RpptDataType getRpptDataType(vx_enum vxDataType) { switch(vxDataType) { case vx_type_e::VX_TYPE_FLOAT32: From e94c53fe73bbea937c5348f6909a62c5b316ab5a Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Fri, 15 Mar 2024 15:05:12 +0000 Subject: [PATCH 02/16] Fixing build issues --- .../amd_rpp/source/kernel_rpp.cpp | 6 +- .../amd_rpp/source/tensor/Downmix.cpp | 59 ++++++++----------- 2 files changed, 26 insertions(+), 39 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 467ed1e15..9495f78c8 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2608,12 +2608,10 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi) -{ +VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); - if (vxGetStatus((vx_reference)context) == VX_SUCCESS) - { + if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { vx_uint32 dev_type = getGraphAffinity(graph); vx_scalar devType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); vx_reference params[] = { diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index 87a36852f..a4d9c27c4 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -23,14 +23,12 @@ THE SOFTWARE. #include "internal_publishKernels.h" #include "vx_ext_amd.h" -struct DownmixLocalData -{ +struct DownmixLocalData { vxRppHandle *handle; Rpp32u device_type; RppPtr_t pSrc; RppPtr_t pDst; - vx_int32 *pSamples; - vx_int32 *pChannels; + vx_int32 *srcDims; RpptDescPtr pSrcDesc; RpptDescPtr pDstDesc; RpptDesc srcDesc; @@ -42,22 +40,20 @@ struct DownmixLocalData static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num, DownmixLocalData *data) { vx_status status = VX_SUCCESS; void *roi_tensor_ptr_src; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; } #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + if (data->device_type == AGO_TARGET_AFFINITY_CPU) { + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); } RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - for(int n = 0; n < data->inputTensorDims[0] ; n++) { - data->pSamples[n] = src_roi[n].xywhROI.xy.x; - data->pChannels[n] = src_roi[n].xywhROI.xy.y; + for (int n = 0; n < data->inputTensorDims[0]; n++) { + data->srcDims[n * 2] = src_roi[n].xywhROI.roiWidth; + data->srcDims[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; } return status; } @@ -72,16 +68,16 @@ static vx_status VX_CALLBACK validateDownmix(vx_node node, const vx_reference pa // Check for input parameters size_t num_tensor_dims; STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); - if(num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); // Check for output parameters STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); - if(num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #1 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #1 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); vx_uint8 tensor_fixed_point_position; size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; vx_enum tensor_datatype; - + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); @@ -97,16 +93,14 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa vx_status return_status = VX_SUCCESS; DownmixLocalData *data = NULL; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) - { + if (data->device_type == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_HIP return_status = VX_ERROR_NOT_IMPLEMENTED; } #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) - { + if (data->device_type == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); - rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, data->pSamples, data->pChannels, false, data->handle->rppHandle); + rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->srcDims, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -133,13 +127,12 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference data->pDstDesc = new RpptDesc; STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1],VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); data->pDstDesc->offsetInBytes = 0; fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); - data->pSamples = new vx_int32[data->pSrcDesc->n]; - data->pChannels = new vx_int32[data->pSrcDesc->n]; + data->srcDims = new vx_int32[data->pSrcDesc->n * 2]; refreshDownmix(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->device_type)); @@ -152,17 +145,16 @@ static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_referenc DownmixLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); - delete(data->pSamples); - delete(data->pChannels); - delete(data); + delete (data->srcDims); + delete (data); return VX_SUCCESS; } //! \brief The kernel target support callback. // TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, - vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 - vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) ) { vx_context context = vxGetContext((vx_reference)graph); AgoTargetAffinityInfo affinity; @@ -172,7 +164,7 @@ static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, else supported_target_affinity = AGO_TARGET_AFFINITY_CPU; -// hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes + // hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes return VX_SUCCESS; } @@ -196,12 +188,11 @@ vx_status Downmix_Register(vx_context context) { if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif amd_kernel_query_target_support_f query_target_support_f = query_target_support; - if (kernel) - { + if (kernel) { STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); @@ -210,8 +201,7 @@ vx_status Downmix_Register(vx_context context) { PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } - if (status != VX_SUCCESS) - { + if (status != VX_SUCCESS) { exit: vxRemoveKernel(kernel); return VX_FAILURE; @@ -219,4 +209,3 @@ vx_status Downmix_Register(vx_context context) { return status; } - From 1a51feea291bce9fb3999952b37fc2a8e72126dd Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Fri, 22 Mar 2024 14:05:41 +0000 Subject: [PATCH 03/16] Add openvx extensions for to_decibels augmentations --- amd_openvx_extensions/amd_rpp/CMakeLists.txt | 1 + .../amd_rpp/include/internal_publishKernels.h | 2 + .../amd_rpp/include/kernels_rpp.h | 3 +- .../amd_rpp/include/vx_ext_rpp.h | 11 + .../source/internal_publishKernels.cpp | 1 + .../amd_rpp/source/kernel_rpp.cpp | 20 ++ .../amd_rpp/source/tensor/ToDecibels.cpp | 230 ++++++++++++++++++ 7 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt index 70332dc3b..884f890cc 100644 --- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt +++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt @@ -156,6 +156,7 @@ list(APPEND SOURCES source/tensor/SequenceRearrange.cpp source/tensor/Snow.cpp source/tensor/Spectrogram.cpp + source/tensor/ToDecibels.cpp source/tensor/Vignette.cpp source/tensor/WarpAffine.cpp source/tensor/SequenceRearrange.cpp diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h index b639fd573..445495aee 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h @@ -158,6 +158,7 @@ vx_status SequenceRearrange_Register(vx_context); vx_status PreemphasisFilter_Register(vx_context); vx_status Spectrogram_Register(vx_context); vx_status Downmix_Register(vx_context); +vx_status ToDecibels_Register(vx_context); // kernel names #define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD" @@ -285,5 +286,6 @@ vx_status Downmix_Register(vx_context); #define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter" #define VX_KERNEL_RPP_SPECTROGRAM_NAME "org.rpp.Spectrogram" #define VX_KERNEL_RPP_DOWNMIX_NAME "org.rpp.Downmix" +#define VX_KERNEL_RPP_TODECIBELS_NAME "org.rpp.ToDecibels" #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_ diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index d73b9ec36..5a367a3f1 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -150,7 +150,8 @@ extern "C" VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73, VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74, VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75, - VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76 + VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76, + VX_KERNEL_RPP_TODECIBELS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77, }; #ifdef __cplusplus diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index f83ddd9db..b3d1d270f 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1908,6 +1908,17 @@ extern "C" */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi); + /*! \brief [Graph] Applies to_decibels augmentation to the input tensor. + * \ingroup group_amd_rpp + * \param [in] graph The handle to the graph. + * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. + * \param [in] pSrcDims The input tensor of batch size in unsigned int containing the roi values for the input tensor in xywh/ltrb format. + * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. + * \param [in] pDstDims The input tensor of batch size in unsigned int containing the roi values for the output tensor in xywh/ltrb format. + * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. + */ + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_tensor pDstDims, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); + #ifdef __cplusplus } #endif diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp index e25a6066b..db238b3e8 100644 --- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp @@ -163,6 +163,7 @@ vx_status get_kernels_to_publish() STATUS_ERROR_CHECK(ADD_KERNEL(PreemphasisFilter_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(Downmix_Register)); + STATUS_ERROR_CHECK(ADD_KERNEL(ToDecibels_Register)); return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index beb17bf8f..572a4b6d6 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2601,6 +2601,26 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, return node; } +VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_tensor pDstDims, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude) { + vx_node node = NULL; + vx_context context = vxGetContext((vx_reference)graph); + if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { + vx_uint32 devType = getGraphAffinity(graph); + vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType); + vx_reference params[] = { + (vx_reference)pSrc, + (vx_reference)pSrcDims, + (vx_reference)pDst, + (vx_reference)pDstDims, + (vx_reference)cutOffDB, + (vx_reference)multiplier, + (vx_reference)referenceMagnitude, + (vx_reference)deviceType}; + node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 8); + } + return node; +} + RpptDataType getRpptDataType(vx_enum vxDataType) { switch(vxDataType) { case vx_type_e::VX_TYPE_FLOAT32: diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp new file mode 100644 index 000000000..f19c0c07a --- /dev/null +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -0,0 +1,230 @@ +/* +Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "internal_publishKernels.h" + +struct ToDecibelsLocalData { + vxRppHandle *handle; + Rpp32u deviceType; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32f cutOffDB; + Rpp32f multiplier; + Rpp32f referenceMagnitude; + RpptDescPtr pSrcDesc; + RpptDescPtr pDstDesc; + RpptImagePatch *pSrcDims; + size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; + size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; +}; + +void copy_src_dims_and_update_dst_roi(ToDecibelsLocalData *data, RpptROI *src_roi, RpptROI *dst_roi) { + memcpy(dst_roi, src_roi, data->pSrcDesc->n * sizeof(RpptROI)); + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcDims[i].width = src_roi[i].xywhROI.xy.x; + data->pSrcDims[i].height = src_roi[i].xywhROI.xy.y; + } +} + +static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num, ToDecibelsLocalData *data) { + vx_status status = VX_SUCCESS; + void *roi_tensor_ptr_src, *roi_tensor_ptr_dst; + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL + return VX_ERROR_NOT_IMPLEMENTED; +#elif ENABLE_HIP + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst))); +#endif + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst))); + } + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + RpptROI *dst_roi = reinterpret_cast(roi_tensor_ptr_dst); + copy_src_dims_and_update_dst_roi(data, src_roi, dst_roi); + return status; +} + +static vx_status VX_CALLBACK validateToDecibels(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); + + // Check for input parameters + size_t num_tensor_dims; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: MelFilterBank: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); + + // Check for output parameters + vx_uint8 tensor_fixed_point_position; + size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; + vx_enum tensor_datatype; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: MelFilterBank: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); + + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + return status; +} + +static vx_status VX_CALLBACK processToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ToDecibelsLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + refreshToDecibels(node, parameters, num, data); + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL + return_status = VX_ERROR_NOT_IMPLEMENTED; +#elif ENABLE_HIP + return_status = VX_ERROR_NOT_IMPLEMENTED; +#endif + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + rpp_status = rppt_to_decibels_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcDims, data->cutOffDB, data->multiplier, data->referenceMagnitude, data->handle->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; + } + return return_status; +} + +static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { + ToDecibelsLocalData *data = new ToDecibelsLocalData; + memset(data, 0, sizeof(ToDecibelsLocalData)); + + vx_enum input_tensor_datatype, output_tensor_datatype; + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->cutOffDB)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->multiplier)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->referenceMagnitude)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + + // Querying for input tensor + data->pSrcDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); + data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); + data->pSrcDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); + + // Querying for output tensor + data->pDstDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); + data->pDstDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); + + data->pSrcDims = static_cast(calloc(data->pSrcDesc->n, sizeof(RpptImagePatch))); + refreshToDecibels(node, parameters, num, data); + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; +} + +static vx_status VX_CALLBACK uninitializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { + ToDecibelsLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + if (data->pSrcDims != nullptr) free(data->pSrcDims); + delete (data->pSrcDesc); + delete (data->pDstDesc); + STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); + delete (data); + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + + return VX_SUCCESS; +} + +vx_status ToDecibels_Register(vx_context context) { + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ToDecibels", + VX_KERNEL_RPP_TODECIBELS, + processToDecibels, + 8, + validateToDecibels, + initializeToDecibels, + uninitializeToDecibels); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_HIP + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); +#else + vx_bool enableBufferAccess = vx_false_e; +#endif + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + + return status; +} \ No newline at end of file From 01aba8a166b9431398ff0d74732dbeea02ea03e0 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 25 Mar 2024 08:49:55 +0000 Subject: [PATCH 04/16] Fixing issues with src ROI --- amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index f19c0c07a..8059c5afa 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -40,8 +40,8 @@ struct ToDecibelsLocalData { void copy_src_dims_and_update_dst_roi(ToDecibelsLocalData *data, RpptROI *src_roi, RpptROI *dst_roi) { memcpy(dst_roi, src_roi, data->pSrcDesc->n * sizeof(RpptROI)); for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDims[i].width = src_roi[i].xywhROI.xy.x; - data->pSrcDims[i].height = src_roi[i].xywhROI.xy.y; + data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; + data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; } } From 68a5d7ec1c7fdc5e9dd6f532c5351aed2accfb50 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 25 Mar 2024 12:57:04 +0000 Subject: [PATCH 05/16] Resolving review comments --- .../amd_rpp/include/vx_ext_rpp.h | 17 ++++++---- .../amd_rpp/source/kernel_rpp.cpp | 6 ++-- .../amd_rpp/source/tensor/Downmix.cpp | 33 +++++++++---------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index cb8635d9f..ee93297b2 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1899,9 +1899,9 @@ extern "C" /*! \brief [Graph] Applies downmixing to the input tensor. * \ingroup group_amd_rpp * \param [in] graph The handle to the graph. - * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. - * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. - * \param [in] pDstRoi The output tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format. + * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. + * \param [out] pDst The output tensor in \ref VX_TYPE_FLOAT32 format data. + * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input. * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi); @@ -1909,10 +1909,13 @@ extern "C" /*! \brief [Graph] Applies to_decibels augmentation to the input tensor. * \ingroup group_amd_rpp * \param [in] graph The handle to the graph. - * \param [in] pSrc The input tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. - * \param [in] pSrcDims The input tensor of batch size in unsigned int containing the roi values for the input tensor in xywh/ltrb format. - * \param [out] pDst The output tensor in \ref VX_TYPE_UINT8 or \ref VX_TYPE_FLOAT32 or \ref VX_TYPE_FLOAT16 or \ref VX_TYPE_INT8 format data. - * \param [in] pDstDims The input tensor of batch size in unsigned int containing the roi values for the output tensor in xywh/ltrb format. + * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. + * \param[in] pSrcDims The input tensor of batch size in unsigned int containing the roi values for the input. + * \param [out] pDst The output tensor in \ref VX_TYPE_FLOAT32 format data. + * \param[in] pDstDims The input tensor of batch size in unsigned int containing the roi values for the output. + * \param[in] cutOffDB The input scalar in \ref VX_TYPE_FLOAT32 format containing minimum or cut-off ratio in dB + * \param[in] multiplier The input scalar in \ref VX_TYPE_FLOAT32 format containing factor by which the logarithm is multiplied + * \param[in] referenceMagnitude The input scalar in \ref VX_TYPE_FLOAT32 format containing Reference magnitude which if not provided uses maximum value of input as reference * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_tensor pDstDims, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 48bff843f..0ce03d1ab 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2588,13 +2588,13 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { - vx_uint32 dev_type = getGraphAffinity(graph); - vx_scalar devType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &dev_type); + vx_uint32 devType = getGraphAffinity(graph); + vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType); vx_reference params[] = { (vx_reference)pSrc, (vx_reference)pDst, (vx_reference)srcRoi, - (vx_reference)devType}; + (vx_reference)deviceType}; node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4); } return node; diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index a4d9c27c4..f2f2e6f4f 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -25,7 +25,7 @@ THE SOFTWARE. struct DownmixLocalData { vxRppHandle *handle; - Rpp32u device_type; + Rpp32u deviceType; RppPtr_t pSrc; RppPtr_t pDst; vx_int32 *srcDims; @@ -40,12 +40,12 @@ struct DownmixLocalData { static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num, DownmixLocalData *data) { vx_status status = VX_SUCCESS; void *roi_tensor_ptr_src; - if (data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_HIP + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; - } #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) { + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); @@ -63,7 +63,7 @@ static vx_status VX_CALLBACK validateDownmix(vx_node node, const vx_reference pa vx_enum scalar_type; STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); if (scalar_type != VX_TYPE_UINT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); // Check for input parameters size_t num_tensor_dims; @@ -93,12 +93,12 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa vx_status return_status = VX_SUCCESS; DownmixLocalData *data = NULL; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - if (data->device_type == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_HIP - return_status = VX_ERROR_NOT_IMPLEMENTED; - } + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return VX_ERROR_NOT_IMPLEMENTED; #endif - if (data->device_type == AGO_TARGET_AFFINITY_CPU) { + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->srcDims, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; @@ -111,8 +111,7 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference memset(data, 0, sizeof(DownmixLocalData)); vx_enum input_tensor_datatype, output_tensor_datatype; - - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->device_type, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); // Querying for input tensor data->pSrcDesc = new RpptDesc; @@ -135,8 +134,7 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference data->srcDims = new vx_int32[data->pSrcDesc->n * 2]; refreshDownmix(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->device_type)); - + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); return VX_SUCCESS; } @@ -144,7 +142,7 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { DownmixLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->device_type)); + STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); delete (data->srcDims); delete (data); return VX_SUCCESS; @@ -188,7 +186,7 @@ vx_status Downmix_Register(vx_context context) { if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); #else - vx_bool enableBufferAccess = vx_false_e; + vx_bool enableBufferAccess = vx_false_e; #endif amd_kernel_query_target_support_f query_target_support_f = query_target_support; @@ -197,7 +195,6 @@ vx_status Downmix_Register(vx_context context) { PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); - // PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } From ebe89331e294b2bdb2ad0b6d1bc517fdf5da1a60 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 25 Mar 2024 13:22:57 +0000 Subject: [PATCH 06/16] Resolving review comments --- amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index f2f2e6f4f..079a5d9f1 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -31,8 +31,6 @@ struct DownmixLocalData { vx_int32 *srcDims; RpptDescPtr pSrcDesc; RpptDescPtr pDstDesc; - RpptDesc srcDesc; - RpptDesc dstDesc; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; }; @@ -44,8 +42,7 @@ static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *pa #if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; #endif - } - if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); @@ -97,8 +94,7 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa #if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; #endif - } - if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->srcDims, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; From a446bff65dac4268cf4c4631a74a47e3438eeb45 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 25 Mar 2024 15:10:59 +0000 Subject: [PATCH 07/16] Removing dstROI from to_decibels openvx augmentation --- .../amd_rpp/include/vx_ext_rpp.h | 3 +- .../amd_rpp/source/kernel_rpp.cpp | 5 ++- .../amd_rpp/source/tensor/ToDecibels.cpp | 33 ++++++++----------- 3 files changed, 16 insertions(+), 25 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index ee93297b2..2773fd01f 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1912,13 +1912,12 @@ extern "C" * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. * \param[in] pSrcDims The input tensor of batch size in unsigned int containing the roi values for the input. * \param [out] pDst The output tensor in \ref VX_TYPE_FLOAT32 format data. - * \param[in] pDstDims The input tensor of batch size in unsigned int containing the roi values for the output. * \param[in] cutOffDB The input scalar in \ref VX_TYPE_FLOAT32 format containing minimum or cut-off ratio in dB * \param[in] multiplier The input scalar in \ref VX_TYPE_FLOAT32 format containing factor by which the logarithm is multiplied * \param[in] referenceMagnitude The input scalar in \ref VX_TYPE_FLOAT32 format containing Reference magnitude which if not provided uses maximum value of input as reference * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_tensor pDstDims, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); #ifdef __cplusplus } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 0ce03d1ab..3c4f4a1f2 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2600,7 +2600,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_tensor pDstDims, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude) { +VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { @@ -2610,12 +2610,11 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pS (vx_reference)pSrc, (vx_reference)pSrcDims, (vx_reference)pDst, - (vx_reference)pDstDims, (vx_reference)cutOffDB, (vx_reference)multiplier, (vx_reference)referenceMagnitude, (vx_reference)deviceType}; - node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 8); + node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 7); } return node; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index 8059c5afa..8c1ce4329 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -37,8 +37,7 @@ struct ToDecibelsLocalData { size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; }; -void copy_src_dims_and_update_dst_roi(ToDecibelsLocalData *data, RpptROI *src_roi, RpptROI *dst_roi) { - memcpy(dst_roi, src_roi, data->pSrcDesc->n * sizeof(RpptROI)); +void copy_src_dims_and_update_dst_roi(ToDecibelsLocalData *data, RpptROI *src_roi) { for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; @@ -47,40 +46,35 @@ void copy_src_dims_and_update_dst_roi(ToDecibelsLocalData *data, RpptROI *src_ro static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num, ToDecibelsLocalData *data) { vx_status status = VX_SUCCESS; - void *roi_tensor_ptr_src, *roi_tensor_ptr_dst; + void *roi_tensor_ptr_src; if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_OPENCL return VX_ERROR_NOT_IMPLEMENTED; #elif ENABLE_HIP - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst))); + return VX_ERROR_NOT_IMPLEMENTED; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst))); } RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - RpptROI *dst_roi = reinterpret_cast(roi_tensor_ptr_dst); - copy_src_dims_and_update_dst_roi(data, src_roi, dst_roi); + copy_src_dims_and_update_dst_roi(data, src_roi); return status; } static vx_status VX_CALLBACK validateToDecibels(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { vx_status status = VX_SUCCESS; vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); if (scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); if (scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); - STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); - if (scalar_type != VX_TYPE_FLOAT32) - return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #6 type=%d (must be size)\n", scalar_type); // Check for input parameters size_t num_tensor_dims; @@ -128,10 +122,10 @@ static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_referen memset(data, 0, sizeof(ToDecibelsLocalData)); vx_enum input_tensor_datatype, output_tensor_datatype; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->cutOffDB)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->multiplier)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->referenceMagnitude)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[7], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->cutOffDB)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->multiplier)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->referenceMagnitude)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); // Querying for input tensor data->pSrcDesc = new RpptDesc; @@ -192,7 +186,7 @@ vx_status ToDecibels_Register(vx_context context) { vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ToDecibels", VX_KERNEL_RPP_TODECIBELS, processToDecibels, - 8, + 7, validateToDecibels, initializeToDecibels, uninitializeToDecibels); @@ -213,11 +207,10 @@ vx_status ToDecibels_Register(vx_context context) { PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); - PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } if (status != VX_SUCCESS) { From 070bd8224417ab181f05d6e58b4accece27a4e63 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 25 Mar 2024 18:10:06 +0000 Subject: [PATCH 08/16] Resolving review comments --- .../amd_rpp/include/kernels_rpp.h | 2 +- .../amd_rpp/include/vx_ext_rpp.h | 4 +- .../amd_rpp/source/kernel_rpp.cpp | 8 ++-- .../amd_rpp/source/tensor/Downmix.cpp | 18 +++++---- .../amd_rpp/source/tensor/Spectrogram.cpp | 2 +- .../amd_rpp/source/tensor/ToDecibels.cpp | 38 ++++++++----------- .../source/api/rocal_api_augmentation.cpp | 1 - 7 files changed, 33 insertions(+), 40 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index 5a367a3f1..7bcce6b02 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -151,7 +151,7 @@ extern "C" VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74, VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75, VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76, - VX_KERNEL_RPP_TODECIBELS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77, + VX_KERNEL_RPP_TODECIBELS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77 }; #ifdef __cplusplus diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index 2773fd01f..1ed72df17 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1910,14 +1910,14 @@ extern "C" * \ingroup group_amd_rpp * \param [in] graph The handle to the graph. * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. - * \param[in] pSrcDims The input tensor of batch size in unsigned int containing the roi values for the input. + * \param[in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input. * \param [out] pDst The output tensor in \ref VX_TYPE_FLOAT32 format data. * \param[in] cutOffDB The input scalar in \ref VX_TYPE_FLOAT32 format containing minimum or cut-off ratio in dB * \param[in] multiplier The input scalar in \ref VX_TYPE_FLOAT32 format containing factor by which the logarithm is multiplied * \param[in] referenceMagnitude The input scalar in \ref VX_TYPE_FLOAT32 format containing Reference magnitude which if not provided uses maximum value of input as reference * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); #ifdef __cplusplus } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index b578cfe33..14d4376e2 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2584,7 +2584,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi) { +VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor pSrcRoi) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { @@ -2593,14 +2593,14 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_reference params[] = { (vx_reference)pSrc, (vx_reference)pDst, - (vx_reference)srcRoi, + (vx_reference)pSrcRoi, (vx_reference)deviceType}; node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4); } return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcDims, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude) { +VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { @@ -2608,7 +2608,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pS vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType); vx_reference params[] = { (vx_reference)pSrc, - (vx_reference)pSrcDims, + (vx_reference)pSrcRoi, (vx_reference)pDst, (vx_reference)cutOffDB, (vx_reference)multiplier, diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index 079a5d9f1..8ddb2dad0 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -28,7 +28,7 @@ struct DownmixLocalData { Rpp32u deviceType; RppPtr_t pSrc; RppPtr_t pDst; - vx_int32 *srcDims; + vx_int32 *psrcRoi; RpptDescPtr pSrcDesc; RpptDescPtr pDstDesc; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; @@ -49,8 +49,8 @@ static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *pa } RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); for (int n = 0; n < data->inputTensorDims[0]; n++) { - data->srcDims[n * 2] = src_roi[n].xywhROI.roiWidth; - data->srcDims[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; + data->psrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; + data->psrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; } return status; } @@ -96,7 +96,7 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); - rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->srcDims, false, data->handle->rppHandle); + rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->psrcRoi, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -127,7 +127,7 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference data->pDstDesc->offsetInBytes = 0; fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); - data->srcDims = new vx_int32[data->pSrcDesc->n * 2]; + data->psrcRoi = new vx_int32[data->pSrcDesc->n * 2]; refreshDownmix(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); @@ -139,13 +139,15 @@ static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_referenc DownmixLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); - delete (data->srcDims); - delete (data); + delete[] data->psrcRoi; + delete data->pSrcDesc; + delete data->pDstDesc; + delete data; return VX_SUCCESS; } //! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp index e7809b276..30cff131e 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp @@ -63,7 +63,7 @@ static vx_status VX_CALLBACK refreshSpectrogram(vx_node node, const vx_reference void *roi_tensor_ptr_src, *roi_tensor_ptr_dst; if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_OPENCL || ENABLE_HIP - status = VX_ERROR_NOT_IMPLEMENTED; + return_status = VX_ERROR_NOT_IMPLEMENTED; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index 8c1ce4329..a2924ddd0 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -1,5 +1,5 @@ /* -Copyright (c) 2019 - 2023 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -37,20 +37,11 @@ struct ToDecibelsLocalData { size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; }; -void copy_src_dims_and_update_dst_roi(ToDecibelsLocalData *data, RpptROI *src_roi) { - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; - data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; - } -} - static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num, ToDecibelsLocalData *data) { vx_status status = VX_SUCCESS; void *roi_tensor_ptr_src; if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - return VX_ERROR_NOT_IMPLEMENTED; -#elif ENABLE_HIP +#if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { @@ -59,7 +50,10 @@ static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); } RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - copy_src_dims_and_update_dst_roi(data, src_roi); + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; + data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; + } return status; } @@ -79,14 +73,14 @@ static vx_status VX_CALLBACK validateToDecibels(vx_node node, const vx_reference // Check for input parameters size_t num_tensor_dims; STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); - if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: MelFilterBank: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ToDecibels: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); // Check for output parameters vx_uint8 tensor_fixed_point_position; size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; vx_enum tensor_datatype; STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); - if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: MelFilterBank: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ToDecibels: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); @@ -105,9 +99,7 @@ static vx_status VX_CALLBACK processToDecibels(vx_node node, const vx_reference STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); refreshToDecibels(node, parameters, num, data); if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { -#if ENABLE_OPENCL - return_status = VX_ERROR_NOT_IMPLEMENTED; -#elif ENABLE_HIP +#if ENABLE_OPENCL || ENABLE_HIP return_status = VX_ERROR_NOT_IMPLEMENTED; #endif } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { @@ -145,7 +137,7 @@ static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_referen data->pDstDesc->offsetInBytes = 0; fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); - data->pSrcDims = static_cast(calloc(data->pSrcDesc->n, sizeof(RpptImagePatch))); + data->pSrcDims = new RpptImagePatch[data->pSrcDesc->n]; refreshToDecibels(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -155,16 +147,16 @@ static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_referen static vx_status VX_CALLBACK uninitializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { ToDecibelsLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - if (data->pSrcDims != nullptr) free(data->pSrcDims); - delete (data->pSrcDesc); - delete (data->pDstDesc); + delete[] data->pSrcDims; + delete data->pSrcDesc; + delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); - delete (data); + delete data; return VX_SUCCESS; } //! \brief The kernel target support callback. -// TODO::currently the node is setting the same affinity as context. This needs to change when we have hubrid modes in the same graph +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) diff --git a/rocAL/rocAL/source/api/rocal_api_augmentation.cpp b/rocAL/rocAL/source/api/rocal_api_augmentation.cpp index 354e21a43..2b2a71dc3 100644 --- a/rocAL/rocAL/source/api/rocal_api_augmentation.cpp +++ b/rocAL/rocAL/source/api/rocal_api_augmentation.cpp @@ -2070,4 +2070,3 @@ rocalNop( } return output; } - From 3be53bfed21e3c1b2bef0f38be0fd7fc7f1e5fe6 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Tue, 16 Apr 2024 08:37:46 +0000 Subject: [PATCH 09/16] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c2105e2e..0766ee4bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ Documentation for MIVisionX is available at * Doxygen support for API documentation * Support for PreEmphasis Filter augmentation in openVX extensions * Support for Spectrogram augmentation in openVX extensions +* Support for Downmix and ToDecibels augmentations in openVX extensions ### Optimizations From 81c5533224ad85f0043a036b39f63c17cc726c81 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Mon, 13 May 2024 07:22:12 +0000 Subject: [PATCH 10/16] Resolving review comments --- .../amd_rpp/source/tensor/Downmix.cpp | 70 ++++++++-------- .../amd_rpp/source/tensor/ToDecibels.cpp | 82 ++++++++++--------- 2 files changed, 80 insertions(+), 72 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index 8ddb2dad0..c90296496 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -104,45 +104,49 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { DownmixLocalData *data = new DownmixLocalData; - memset(data, 0, sizeof(DownmixLocalData)); - - vx_enum input_tensor_datatype, output_tensor_datatype; - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - - // Querying for input tensor - data->pSrcDesc = new RpptDesc; - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); - data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); - data->pSrcDesc->offsetInBytes = 0; - fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); - - // Querying for output tensor - data->pDstDesc = new RpptDesc; - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); - data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); - data->pDstDesc->offsetInBytes = 0; - fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); - - data->psrcRoi = new vx_int32[data->pSrcDesc->n * 2]; - - refreshDownmix(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data) { + memset(data, 0, sizeof(DownmixLocalData)); + + vx_enum input_tensor_datatype, output_tensor_datatype; + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + + // Querying for input tensor + data->pSrcDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); + data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); + data->pSrcDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); + + // Querying for output tensor + data->pDstDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); + data->pDstDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); + + data->psrcRoi = new vx_int32[data->pSrcDesc->n * 2]; + + refreshDownmix(node, parameters, num, data); + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; + } else { + return VX_FAILURE; + } } static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { DownmixLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + if (data->psrcRoi) delete[] data->psrcRoi; + if (data->pSrcDesc) delete data->pSrcDesc; + if (data->pDstDesc) delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); - delete[] data->psrcRoi; - delete data->pSrcDesc; - delete data->pDstDesc; - delete data; + if (data) delete data; return VX_SUCCESS; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index a2924ddd0..109241e83 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -50,10 +50,10 @@ static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); } RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; - data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; - } + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; + data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; + } return status; } @@ -111,47 +111,51 @@ static vx_status VX_CALLBACK processToDecibels(vx_node node, const vx_reference static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { ToDecibelsLocalData *data = new ToDecibelsLocalData; - memset(data, 0, sizeof(ToDecibelsLocalData)); - - vx_enum input_tensor_datatype, output_tensor_datatype; - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->cutOffDB)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->multiplier)); - STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->referenceMagnitude)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); - - // Querying for input tensor - data->pSrcDesc = new RpptDesc; - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); - data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); - data->pSrcDesc->offsetInBytes = 0; - fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); - - // Querying for output tensor - data->pDstDesc = new RpptDesc; - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); - data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); - data->pDstDesc->offsetInBytes = 0; - fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); - - data->pSrcDims = new RpptImagePatch[data->pSrcDesc->n]; - refreshToDecibels(node, parameters, num, data); - STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); - STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - return VX_SUCCESS; + if (data) { + memset(data, 0, sizeof(ToDecibelsLocalData)); + + vx_enum input_tensor_datatype, output_tensor_datatype; + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->cutOffDB)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->multiplier)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->referenceMagnitude)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + + // Querying for input tensor + data->pSrcDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); + data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); + data->pSrcDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); + + // Querying for output tensor + data->pDstDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); + data->pDstDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); + + data->pSrcDims = new RpptImagePatch[data->pSrcDesc->n]; + refreshToDecibels(node, parameters, num, data); + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; + } else { + return VX_FAILURE; + } } static vx_status VX_CALLBACK uninitializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { ToDecibelsLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - delete[] data->pSrcDims; - delete data->pSrcDesc; - delete data->pDstDesc; + if (data->pSrcDims) delete[] data->pSrcDims; + if (data->pSrcDesc) delete data->pSrcDesc; + if (data->pDstDesc) delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); - delete data; + if (data) delete data; return VX_SUCCESS; } From e0883dd66d2eebeec7ce07a52eb87cac2b7cfb5c Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Tue, 14 May 2024 15:08:08 +0000 Subject: [PATCH 11/16] Resolving review comments --- .../amd_rpp/include/internal_rpp.h | 12 ++++++++- .../amd_rpp/source/kernel_rpp.cpp | 27 ++++--------------- .../amd_rpp/source/tensor/Downmix.cpp | 19 +++++++------ .../amd_rpp/source/tensor/ToDecibels.cpp | 19 +++++++------ 4 files changed, 38 insertions(+), 39 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h index ee9ea06c9..afd1aaf5b 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h @@ -43,6 +43,7 @@ THE SOFTWARE. #include #include #include +#include using namespace std; @@ -74,6 +75,16 @@ enum vxTensorLayout { VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank }; +const std::map TENSOR_LAYOUT_MAPPING = { + {vxTensorLayout::VX_NHWC, RpptLayout::NHWC}, + {vxTensorLayout::VX_NCHW, RpptLayout::NCHW}, + {vxTensorLayout::VX_NFHWC, RpptLayout::NHWC}, + {vxTensorLayout::VX_NFCHW, RpptLayout::NCHW}, + {vxTensorLayout::VX_NHW, RpptLayout::NHW}, + {vxTensorLayout::VX_NFT, RpptLayout::NFT}, + {vxTensorLayout::VX_NTF, RpptLayout::NTF} +}; + //! Brief The utility functions vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx_uint32 num); vx_status createRPPHandle(vx_node node, vxRppHandle ** pHandle, Rpp32u batchSize, Rpp32u deviceType); @@ -81,7 +92,6 @@ vx_status releaseRPPHandle(vx_node node, vxRppHandle * handle, Rpp32u deviceType void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims); void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims, vxTensorLayout layout = vxTensorLayout::VX_NHW); RpptDataType getRpptDataType(vx_enum dataType); -RpptLayout getRpptLayout(vxTensorLayout layout); class Kernellist { diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 07c17353a..0a1f3de0a 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2632,27 +2632,6 @@ RpptDataType getRpptDataType(vx_enum vxDataType) { } } -RpptLayout getRpptLayout(vxTensorLayout layout) { - switch(layout) { - case vxTensorLayout::VX_NHWC: - return RpptLayout::NHWC; - case vxTensorLayout::VX_NCHW: - return RpptLayout::NCHW; - case vxTensorLayout::VX_NFHWC: - return RpptLayout::NHWC; - case vxTensorLayout::VX_NFCHW: - return RpptLayout::NCHW; - case vxTensorLayout::VX_NHW: - return RpptLayout::NHW; - case vxTensorLayout::VX_NFT: - return RpptLayout::NFT; - case vxTensorLayout::VX_NTF: - return RpptLayout::NTF; - default: - throw std::runtime_error("Invalid layout"); - } -} - void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims) { switch(layout) { case vxTensorLayout::VX_NHWC: { @@ -2719,7 +2698,11 @@ void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *maxTensorDims descPtr->strides.wStride = descPtr->c; descPtr->strides.cStride = 1; descPtr->numDims = 4; - descPtr->layout = getRpptLayout(layout); + if(TENSOR_LAYOUT_MAPPING.find(layout) != TENSOR_LAYOUT_MAPPING.end()) { + descPtr->layout = TENSOR_LAYOUT_MAPPING.at(layout); + } else { + throw std::runtime_error("Invalid layout"); + } } // utility functions diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index c90296496..86d0da58c 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -37,20 +37,22 @@ struct DownmixLocalData { static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num, DownmixLocalData *data) { vx_status status = VX_SUCCESS; - void *roi_tensor_ptr_src; if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; #endif - } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + void *roi_tensor_ptr_src; STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); - } - RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - for (int n = 0; n < data->inputTensorDims[0]; n++) { - data->psrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; - data->psrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + for (int n = 0; n < data->inputTensorDims[0]; n++) { + data->psrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; + data->psrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; + } + return status; } return status; } @@ -94,7 +96,8 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa #if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; #endif - } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->psrcRoi, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index 109241e83..e54efc60a 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -39,20 +39,22 @@ struct ToDecibelsLocalData { static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num, ToDecibelsLocalData *data) { vx_status status = VX_SUCCESS; - void *roi_tensor_ptr_src; if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { #if ENABLE_OPENCL || ENABLE_HIP return VX_ERROR_NOT_IMPLEMENTED; #endif - } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + void *roi_tensor_ptr_src; STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); - } - RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; - data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; + data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; + } + return status; } return status; } @@ -102,7 +104,8 @@ static vx_status VX_CALLBACK processToDecibels(vx_node node, const vx_reference #if ENABLE_OPENCL || ENABLE_HIP return_status = VX_ERROR_NOT_IMPLEMENTED; #endif - } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { rpp_status = rppt_to_decibels_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcDims, data->cutOffDB, data->multiplier, data->referenceMagnitude, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } From a8491251687cacef23790e75d64e0da8a057fec9 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Wed, 15 May 2024 07:50:05 +0000 Subject: [PATCH 12/16] Minor changes --- .../amd_rpp/include/vx_ext_rpp.h | 4 +++- .../amd_rpp/source/kernel_rpp.cpp | 6 ++++-- .../amd_rpp/source/tensor/Downmix.cpp | 13 ++++++------- .../amd_rpp/source/tensor/ToDecibels.cpp | 19 +++++++++++++++++-- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index bf5365392..c93ae8935 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1915,9 +1915,11 @@ extern "C" * \param[in] cutOffDB The input scalar in \ref VX_TYPE_FLOAT32 format containing minimum or cut-off ratio in dB * \param[in] multiplier The input scalar in \ref VX_TYPE_FLOAT32 format containing factor by which the logarithm is multiplied * \param[in] referenceMagnitude The input scalar in \ref VX_TYPE_FLOAT32 format containing Reference magnitude which if not provided uses maximum value of input as reference + * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor. + * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor. * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ - SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude); + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude, vx_scalar inputLayout, vx_scalar outputLayout); #ifdef __cplusplus } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 0a1f3de0a..b242e7de0 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2600,7 +2600,7 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, return node; } -VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude) { +VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude, vx_scalar inputLayout, vx_scalar outputLayout) { vx_node node = NULL; vx_context context = vxGetContext((vx_reference)graph); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { @@ -2613,8 +2613,10 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pS (vx_reference)cutOffDB, (vx_reference)multiplier, (vx_reference)referenceMagnitude, + (vx_reference)inputLayout, + (vx_reference)outputLayout, (vx_reference)deviceType}; - node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 7); + node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 9); } return node; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index 86d0da58c..6ca9c7ede 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -28,7 +28,7 @@ struct DownmixLocalData { Rpp32u deviceType; RppPtr_t pSrc; RppPtr_t pDst; - vx_int32 *psrcRoi; + vx_int32 *pSrcRoi; RpptDescPtr pSrcDesc; RpptDescPtr pDstDesc; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; @@ -49,8 +49,8 @@ static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *pa STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); for (int n = 0; n < data->inputTensorDims[0]; n++) { - data->psrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; - data->psrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; + data->pSrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; + data->pSrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; } return status; } @@ -99,7 +99,7 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa } if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); - rpp_status = rppt_down_mixing_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->psrcRoi, false, data->handle->rppHandle); + rpp_status = rppt_down_mixing_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, (Rpp32s *)data->pSrcRoi, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; } return return_status; @@ -131,8 +131,7 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference data->pDstDesc->offsetInBytes = 0; fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); - data->psrcRoi = new vx_int32[data->pSrcDesc->n * 2]; - + data->pSrcRoi = new vx_int32[data->pSrcDesc->n * 2]; refreshDownmix(node, parameters, num, data); STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); @@ -145,7 +144,7 @@ static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { DownmixLocalData *data; STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); - if (data->psrcRoi) delete[] data->psrcRoi; + if (data->pSrcRoi) delete[] data->pSrcRoi; if (data->pSrcDesc) delete data->pSrcDesc; if (data->pDstDesc) delete data->pDstDesc; STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index e54efc60a..7121a1b04 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -33,6 +33,8 @@ struct ToDecibelsLocalData { RpptDescPtr pSrcDesc; RpptDescPtr pDstDesc; RpptImagePatch *pSrcDims; + vxTensorLayout inputLayout; + vxTensorLayout outputLayout; size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; }; @@ -71,6 +73,12 @@ static vx_status VX_CALLBACK validateToDecibels(vx_node node, const vx_reference STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); if (scalar_type != VX_TYPE_FLOAT32) return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type); // Check for input parameters size_t num_tensor_dims; @@ -118,10 +126,15 @@ static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_referen memset(data, 0, sizeof(ToDecibelsLocalData)); vx_enum input_tensor_datatype, output_tensor_datatype; + vx_int32 input_layout, output_layout; STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->cutOffDB)); STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->multiplier)); STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->referenceMagnitude)); - STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[6], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &input_layout)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &output_layout)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + data->inputLayout = static_cast(input_layout); + data->outputLayout = static_cast(output_layout); // Querying for input tensor data->pSrcDesc = new RpptDesc; @@ -185,7 +198,7 @@ vx_status ToDecibels_Register(vx_context context) { vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ToDecibels", VX_KERNEL_RPP_TODECIBELS, processToDecibels, - 7, + 9, validateToDecibels, initializeToDecibels, uninitializeToDecibels); @@ -210,6 +223,8 @@ vx_status ToDecibels_Register(vx_context context) { PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); } if (status != VX_SUCCESS) { From 36fdc89a781d0e0bfd82d60a06393b642f441a83 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Wed, 15 May 2024 16:19:27 +0000 Subject: [PATCH 13/16] Removing if blocks for CPU device checks --- .../amd_rpp/source/tensor/Downmix.cpp | 19 ++++++++----------- .../amd_rpp/source/tensor/ToDecibels.cpp | 19 ++++++++----------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index 6ca9c7ede..532f9f364 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -42,17 +42,14 @@ static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *pa return VX_ERROR_NOT_IMPLEMENTED; #endif } - if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { - void *roi_tensor_ptr_src; - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); - RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - for (int n = 0; n < data->inputTensorDims[0]; n++) { - data->pSrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; - data->pSrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; - } - return status; + void *roi_tensor_ptr_src; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + for (int n = 0; n < data->inputTensorDims[0]; n++) { + data->pSrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; + data->pSrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; } return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index 7121a1b04..8c83e8e93 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -46,17 +46,14 @@ static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference return VX_ERROR_NOT_IMPLEMENTED; #endif } - if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { - void *roi_tensor_ptr_src; - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); - STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); - RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); - for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { - data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; - data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; - } - return status; + void *roi_tensor_ptr_src; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; + data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; } return status; } From ff3c50fcde52897b3ca7659d76ab20c63c7b86d7 Mon Sep 17 00:00:00 2001 From: fiona-gladwin Date: Mon, 20 May 2024 06:21:32 -0400 Subject: [PATCH 14/16] Add RPP_AUDIO flag for RPP audio API --- amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp | 4 ++++ amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp index 532f9f364..5ed6924c0 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -96,8 +96,12 @@ static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *pa } if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { refreshDownmix(node, parameters, num, data); +#if RPP_AUDIO rpp_status = rppt_down_mixing_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, (Rpp32s *)data->pSrcRoi, false, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#else + return_status = VX_ERROR_NOT_SUPPORTED; +#endif } return return_status; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp index 8c83e8e93..d09d08d43 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -111,8 +111,12 @@ static vx_status VX_CALLBACK processToDecibels(vx_node node, const vx_reference #endif } if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { +#if RPP_AUDIO rpp_status = rppt_to_decibels_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcDims, data->cutOffDB, data->multiplier, data->referenceMagnitude, data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#else + return_status = VX_ERROR_NOT_SUPPORTED; +#endif } return return_status; } From 0df3adbf362a82ceabe62ed848769612a7b0aaff Mon Sep 17 00:00:00 2001 From: Fiona-MCW <70996026+fiona-gladwin@users.noreply.github.com> Date: Wed, 29 May 2024 04:21:24 +0530 Subject: [PATCH 15/16] Audio PR - Augmentation support [ Spectrogram ] (#1355) * Bump rocm-docs-core[api_reference] from 0.34.0 to 0.34.2 in /docs/sphinx (#1286) Bumps [rocm-docs-core[api_reference]](https://github.com/RadeonOpenCompute/rocm-docs-core) from 0.34.0 to 0.34.2. - [Release notes](https://github.com/RadeonOpenCompute/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/RadeonOpenCompute/rocm-docs-core/compare/v0.34.0...v0.34.2) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump cryptography from 42.0.0 to 42.0.2 in /docs/sphinx (#1289) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.0 to 42.0.2. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.0...42.0.2) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Add PreEmphasis filter support * Fix ROI - change from xy to width & height * Adding openvx changes for downmix node * Audio Augmentations 1 PR - NSR and Spectrogram * Revert "Adding openvx changes for downmix node" This reverts commit d53f81dd2fd385edd85a0a5efbe5c0f0a112fdc4. * PR comments resolution in the PreEmphais Filter * Minor Changes * Change the borderType enum to int32 from uint32 dtype * Fix validation of preemphasis * Remove the memcopy of the src and dest rois as it can be handled in the rocAL - since the src and dst rois remain same * Formatting change - minor * Remove NSR * Minor formatting changes * Minor fix * Minor update - remove the 2nd instance of preemphasis filter * Enum dtype - change from uint to int * Remove roi_tensor_ptr_dst as its unused after latest changes * Remove the dst_roi arg from vxExtRppPreemphasisFilter call as its unused * Add MFB to MIVisisonX * Revert "Add MFB to MIVisisonX" This reverts commit dc4200bb82af9314dc0cffc89e68f369e45deab2. * Resolve the PR comments * Change the dims[0] and dims[1] positioning for Spectrogram and AudioFillDescPointers * Change function name to camelCase * Revert "Change the dims[0] and dims[1] positioning for Spectrogram and AudioFillDescPointers" This reverts commit 886d6af350b12696947994555dbfcaf4609088c7. * Fix Spectrogram * Docs - update TOC for API Ref (#1327) * Bump rocm-docs-core[api_reference] from 0.38.0 to 0.38.1 in /docs/sphinx (#1328) Bumps [rocm-docs-core[api_reference]](https://github.com/RadeonOpenCompute/rocm-docs-core) from 0.38.0 to 0.38.1. - [Release notes](https://github.com/RadeonOpenCompute/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/RadeonOpenCompute/rocm-docs-core/compare/v0.38.0...v0.38.1) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update CHANGELOG.md * Update CHANGELOG.md * Documents - Bump idna from 3.4 to 3.7 in /docs/sphinx (#1330) Bumps [idna](https://github.com/kjd/idna) from 3.4 to 3.7. - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7) --- updated-dependencies: - dependency-name: idna dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update changelog * Resolve minor PR comments * Remove comments * Docs - Bump tqdm from 4.65.0 to 4.66.3 in /docs/sphinx (#1339) Bumps [tqdm](https://github.com/tqdm/tqdm) from 4.65.0 to 4.66.3. - [Release notes](https://github.com/tqdm/tqdm/releases) - [Commits](https://github.com/tqdm/tqdm/compare/v4.65.0...v4.66.3) --- updated-dependencies: - dependency-name: tqdm dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Docs - Bump jinja2 from 3.1.3 to 3.1.4 in /docs/sphinx (#1340) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.3 to 3.1.4. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.3...3.1.4) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Find Half - Fix (#1341) * MIVisionX Setup - Updates (#1343) * SWDEV-459739 - Remove the package obsolete setting (#1345) The package was obsoleting itself and was causing upgrade issues. Removed the same. * Fix the layout issue with spec * Add layouts for Audio in vxTensorLayout Remove spectrogram layout param and pass layout in descriptor * Check the validity of pointers * Audio PR - Augmentation support [ Spectrogram ] (#1319) * Bump rocm-docs-core[api_reference] from 0.34.0 to 0.34.2 in /docs/sphinx (#1286) Bumps [rocm-docs-core[api_reference]](https://github.com/RadeonOpenCompute/rocm-docs-core) from 0.34.0 to 0.34.2. - [Release notes](https://github.com/RadeonOpenCompute/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/RadeonOpenCompute/rocm-docs-core/compare/v0.34.0...v0.34.2) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Bump cryptography from 42.0.0 to 42.0.2 in /docs/sphinx (#1289) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.0 to 42.0.2. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.0...42.0.2) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Add PreEmphasis filter support * Fix ROI - change from xy to width & height * Adding openvx changes for downmix node * Audio Augmentations 1 PR - NSR and Spectrogram * Revert "Adding openvx changes for downmix node" This reverts commit d53f81dd2fd385edd85a0a5efbe5c0f0a112fdc4. * PR comments resolution in the PreEmphais Filter * Minor Changes * Change the borderType enum to int32 from uint32 dtype * Fix validation of preemphasis * Remove the memcopy of the src and dest rois as it can be handled in the rocAL - since the src and dst rois remain same * Formatting change - minor * Remove NSR * Minor formatting changes * Minor fix * Minor update - remove the 2nd instance of preemphasis filter * Enum dtype - change from uint to int * Remove roi_tensor_ptr_dst as its unused after latest changes * Remove the dst_roi arg from vxExtRppPreemphasisFilter call as its unused * Add MFB to MIVisisonX * Revert "Add MFB to MIVisisonX" This reverts commit dc4200bb82af9314dc0cffc89e68f369e45deab2. * Resolve the PR comments * Change the dims[0] and dims[1] positioning for Spectrogram and AudioFillDescPointers * Change function name to camelCase * Revert "Change the dims[0] and dims[1] positioning for Spectrogram and AudioFillDescPointers" This reverts commit 886d6af350b12696947994555dbfcaf4609088c7. * Fix Spectrogram * Docs - update TOC for API Ref (#1327) * Bump rocm-docs-core[api_reference] from 0.38.0 to 0.38.1 in /docs/sphinx (#1328) Bumps [rocm-docs-core[api_reference]](https://github.com/RadeonOpenCompute/rocm-docs-core) from 0.38.0 to 0.38.1. - [Release notes](https://github.com/RadeonOpenCompute/rocm-docs-core/releases) - [Changelog](https://github.com/ROCm/rocm-docs-core/blob/develop/CHANGELOG.md) - [Commits](https://github.com/RadeonOpenCompute/rocm-docs-core/compare/v0.38.0...v0.38.1) --- updated-dependencies: - dependency-name: rocm-docs-core[api_reference] dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update CHANGELOG.md * Update CHANGELOG.md * Documents - Bump idna from 3.4 to 3.7 in /docs/sphinx (#1330) Bumps [idna](https://github.com/kjd/idna) from 3.4 to 3.7. - [Release notes](https://github.com/kjd/idna/releases) - [Changelog](https://github.com/kjd/idna/blob/master/HISTORY.rst) - [Commits](https://github.com/kjd/idna/compare/v3.4...v3.7) --- updated-dependencies: - dependency-name: idna dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> * Update changelog * Resolve minor PR comments * Remove comments * Fix the layout issue with spec * Check the validity of pointers --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: root Co-authored-by: Swetha B S Co-authored-by: SundarRajan28 Co-authored-by: Swetha B S Co-authored-by: randyh62 <42045079+randyh62@users.noreply.github.com> * Introduce API to obtain RPP layout * Add comments * Use RPP_AUDIO flag to disable RPP audio calls * Add Audio flag for PreEmphasis filter --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: root Co-authored-by: Swetha B S Co-authored-by: Swetha B S Co-authored-by: SundarRajan28 Co-authored-by: Swetha B S Co-authored-by: randyh62 <42045079+randyh62@users.noreply.github.com> Co-authored-by: swetha097 <59434434+swetha097@users.noreply.github.com> Co-authored-by: Kiriti Gowda Co-authored-by: raramakr <91213141+raramakr@users.noreply.github.com> Co-authored-by: Sundar Rajan Vaithiyanathan <99159823+SundarRajan28@users.noreply.github.com> Co-authored-by: Lakshmi Kumar --- CHANGELOG.md | 1 + amd_openvx_extensions/amd_rpp/CMakeLists.txt | 1 + .../amd_rpp/include/internal_publishKernels.h | 16 +- .../amd_rpp/include/internal_rpp.h | 8 +- .../amd_rpp/include/kernels_rpp.h | 3 +- .../amd_rpp/include/vx_ext_rpp.h | 20 ++ .../source/internal_publishKernels.cpp | 1 + .../amd_rpp/source/kernel_rpp.cpp | 63 +++- .../source/tensor/PreemphasisFilter.cpp | 4 + .../amd_rpp/source/tensor/Spectrogram.cpp | 272 ++++++++++++++++++ 10 files changed, 375 insertions(+), 14 deletions(-) create mode 100644 amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 81d4c8226..f1b94b987 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ Documentation for MIVisionX is available at * Support for advanced GPUs * Support for PreEmphasis Filter augmentation in openVX extensions +* Support for Spectrogram augmentation in openVX extensions ### Optimizations diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt index 6ebb147a9..bb091a4f1 100644 --- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt +++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt @@ -157,6 +157,7 @@ list(APPEND SOURCES source/tensor/Saturation.cpp source/tensor/SequenceRearrange.cpp source/tensor/Snow.cpp + source/tensor/Spectrogram.cpp source/tensor/Vignette.cpp source/tensor/WarpAffine.cpp source/tensor/SequenceRearrange.cpp diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h index d96a927cc..9c63cb450 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h @@ -156,6 +156,7 @@ vx_status Snow_Register(vx_context); vx_status Vignette_Register(vx_context); vx_status WarpAffine_Register(vx_context); vx_status SequenceRearrange_Register(vx_context); +vx_status Spectrogram_Register(vx_context); // kernel names #define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD" @@ -274,12 +275,13 @@ vx_status SequenceRearrange_Register(vx_context); #define VX_KERNEL_RPP_PIXELATE_NAME "org.rpp.Pixelate" #define VX_KERNEL_RPP_VIGNETTE_NAME "org.rpp.Vignette" #define VX_KERNEL_RPP_WARPAFFINE_NAME "org.rpp.WarpAffine" -#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness" -#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy" -#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize" -#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop" -#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize" -#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange" -#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter" +#define VX_KERNEL_RPP_BRIGHTNESS_NAME "org.rpp.Brightness" +#define VX_KERNEL_RPP_COPY_NAME "org.rpp.Copy" +#define VX_KERNEL_RPP_CROPMIRRORNORMALIZE_NAME "org.rpp.CropMirrorNormalize" +#define VX_KERNEL_RPP_NOP_NAME "org.rpp.Nop" +#define VX_KERNEL_RPP_RESIZE_NAME "org.rpp.Resize" +#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange" +#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter" +#define VX_KERNEL_RPP_SPECTROGRAM_NAME "org.rpp.Spectrogram" #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_ diff --git a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h index 61f5dbb29..ee9ea06c9 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h @@ -68,7 +68,10 @@ enum vxTensorLayout { VX_NHWC = 0, VX_NCHW = 1, VX_NFHWC = 2, - VX_NFCHW = 3 + VX_NFCHW = 3, + VX_NHW = 4, // Audio/2D layout + VX_NFT = 5, // Frequency major, Used for Spectrogram/MelFilterBank + VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank }; //! Brief The utility functions @@ -76,8 +79,9 @@ vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx vx_status createRPPHandle(vx_node node, vxRppHandle ** pHandle, Rpp32u batchSize, Rpp32u deviceType); vx_status releaseRPPHandle(vx_node node, vxRppHandle * handle, Rpp32u deviceType); void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims); -void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims); +void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims, vxTensorLayout layout = vxTensorLayout::VX_NHW); RpptDataType getRpptDataType(vx_enum dataType); +RpptLayout getRpptLayout(vxTensorLayout layout); class Kernellist { diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index e83d1ac0b..c34a45804 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -148,7 +148,8 @@ extern "C" VX_KERNEL_RPP_SNOW = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x71, VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72, VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73, - VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74 + VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74, + VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75 }; #ifdef __cplusplus diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index 618cd1a2f..a28891d1a 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1876,6 +1876,26 @@ extern "C" * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pPreemphCoeff, vx_scalar borderType); + + /*! \brief [Graph] Produces a spectrogram from a 1D signal. + * \ingroup group_amd_rpp + * \param [in] graph The handle to the graph. + * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. + * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input in xywh/ltrb format. + * \param [out] pDst The output tensor (begin) in \ref VX_TYPE_FLOAT32 format data. + * \param [in] pDstRoi The input tensor of batch size in unsigned int containing the roi values for the output tensor in xywh/ltrb format. + * \param [in] windowFunction The input array in \ref VX_TYPE_FLOAT32 format containing the samples of the window function that will be multiplied to each extracted window when calculating the STFT. + * \param [in] centerWindow The input scalar in \ref VX_TYPE_BOOL format indicates whether extracted windows should be padded so that the window function is centered at multiples of window_step. + * \param [in] reflectPadding The input scalar in \ref VX_TYPE_BOOL format indicates the padding policy when sampling outside the bounds of the signal. + * \param [in] spectrogramLayout The input scalar in \ref VX_TYPE_INT32 format containing the Output spectrogram layout. + * \param [in] power The input scalar in \ref VX_TYPE_INT32 format containing the exponent of the magnitude of the spectrum. + * \param [in] nfft The input scalar in \ref VX_TYPE_INT32 format containing the size of the FFT. + * \param [in] windowLength The input scalar in \ref VX_TYPE_INT32 format containing Window size in number of samples. + * \param [in] windowStep The input array in \ref VX_TYPE_INT32 format containing the step between the STFT windows in number of samples. + * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. + */ + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFunction, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout, vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep); + #ifdef __cplusplus } #endif diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp index 3ee050805..f01a35d48 100644 --- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp @@ -161,6 +161,7 @@ vx_status get_kernels_to_publish() STATUS_ERROR_CHECK(ADD_KERNEL(Snow_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register)); + STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register)); return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index c245a8e97..050bf1515 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2558,6 +2558,32 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppPreemphasisFilter(vx_graph graph, vx_te return node; } +VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFunction, vx_scalar centerWindows, vx_scalar reflectPadding, vx_scalar spectrogramLayout, + vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep) { + vx_node node = NULL; + vx_context context = vxGetContext((vx_reference)graph); + if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { + vx_uint32 devtype = getGraphAffinity(graph); + vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devtype); + vx_reference params[] = { + (vx_reference)pSrc, + (vx_reference)pSrcRoi, + (vx_reference)pDst, + (vx_reference)pDstRoi, + (vx_reference)windowFunction, + (vx_reference)centerWindows, + (vx_reference)reflectPadding, + (vx_reference)spectrogramLayout, + (vx_reference)power, + (vx_reference)nfft, + (vx_reference)windowLength, + (vx_reference)windowStep, + (vx_reference)deviceType}; + node = createNode(graph, VX_KERNEL_RPP_SPECTROGRAM, params, 13); + } + return node; +} + RpptDataType getRpptDataType(vx_enum vxDataType) { switch(vxDataType) { case vx_type_e::VX_TYPE_FLOAT32: @@ -2571,6 +2597,34 @@ RpptDataType getRpptDataType(vx_enum vxDataType) { } } +RpptLayout getRpptLayout(vxTensorLayout layout) { + switch(layout) { + case vxTensorLayout::VX_NHWC: + return RpptLayout::NHWC; + case vxTensorLayout::VX_NCHW: + return RpptLayout::NCHW; + case vxTensorLayout::VX_NFHWC: + return RpptLayout::NHWC; + case vxTensorLayout::VX_NFCHW: + return RpptLayout::NCHW; +#if RPP_AUDIO + case vxTensorLayout::VX_NHW: + return RpptLayout::NHW; + case vxTensorLayout::VX_NFT: + return RpptLayout::NFT; + case vxTensorLayout::VX_NTF: + return RpptLayout::NTF; +#else + case vxTensorLayout::VX_NHW: + case vxTensorLayout::VX_NFT: + case vxTensorLayout::VX_NTF: + throw std::runtime_error("RPP_AUDIO flag disabled, Audio layouts are not supported"); +#endif + default: + throw std::runtime_error("Invalid layout"); + } +} + void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims) { switch(layout) { case vxTensorLayout::VX_NHWC: { @@ -2627,16 +2681,17 @@ void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, siz } } -void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims) { - descPtr->n = tensorDims[0]; - descPtr->h = tensorDims[2]; - descPtr->w = tensorDims[1]; +void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *maxTensorDims, vxTensorLayout layout) { + descPtr->n = maxTensorDims[0]; + descPtr->h = maxTensorDims[1]; + descPtr->w = maxTensorDims[2]; descPtr->c = 1; descPtr->strides.nStride = descPtr->c * descPtr->w * descPtr->h; descPtr->strides.hStride = descPtr->c * descPtr->w; descPtr->strides.wStride = descPtr->c; descPtr->strides.cStride = 1; descPtr->numDims = 4; + descPtr->layout = getRpptLayout(layout); } // utility functions diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/PreemphasisFilter.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/PreemphasisFilter.cpp index abf2780d8..416d919b1 100644 --- a/amd_openvx_extensions/amd_rpp/source/tensor/PreemphasisFilter.cpp +++ b/amd_openvx_extensions/amd_rpp/source/tensor/PreemphasisFilter.cpp @@ -98,8 +98,12 @@ static vx_status VX_CALLBACK processPreemphasisFilter(vx_node node, const vx_ref #endif } if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { +#if RPP_AUDIO rpp_status = rppt_pre_emphasis_filter_host((float *)data->pSrc, data->pSrcDesc, (float *)data->pDst, data->pDstDesc, (Rpp32s *)data->pSampleSize, data->pPreemphCoeff, RpptAudioBorderType(data->borderType), data->handle->rppHandle); return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#else + return_status = VX_ERROR_NOT_SUPPORTED; +#endif } return return_status; } diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp new file mode 100644 index 000000000..610f2bcfb --- /dev/null +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Spectrogram.cpp @@ -0,0 +1,272 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "internal_publishKernels.h" + +struct SpectrogramLocalData { + vxRppHandle *handle; + Rpp32u deviceType; + RppPtr_t pSrc; + RppPtr_t pDst; + bool centerWindows; + bool reflectPadding; + vxTensorLayout spectrogramLayout; + Rpp32s power; + Rpp32s nfft; + Rpp32s windowLength; + Rpp32s windowOffset; + Rpp32s windowStep; + RpptDescPtr pSrcDesc; + RpptDescPtr pDstDesc; + Rpp32s *pSrcLength; + Rpp32f *pWindowFn; + size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; + size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; +}; + +void updateDstRoi(SpectrogramLocalData *data, RpptROI *src_roi, RpptROI *dst_roi) { + const Rpp32s num_frames = ((data->nfft / 2) + 1); + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcLength[i] = static_cast(src_roi[i].xywhROI.roiWidth); + if (data->spectrogramLayout == vxTensorLayout::VX_NTF) { + dst_roi[i].xywhROI.roiWidth = ((data->pSrcLength[i] - data->windowOffset) / data->windowStep) + 1; + dst_roi[i].xywhROI.roiHeight = num_frames; + } else if (data->spectrogramLayout == vxTensorLayout::VX_NFT) { + dst_roi[i].xywhROI.roiWidth = num_frames; + dst_roi[i].xywhROI.roiHeight = ((data->pSrcLength[i] - data->windowOffset) / data->windowStep) + 1; + } + } +} + +static vx_status VX_CALLBACK refreshSpectrogram(vx_node node, const vx_reference *parameters, SpectrogramLocalData *data) { + vx_status status = VX_SUCCESS; + vx_status return_status = VX_SUCCESS; + void *roi_tensor_ptr_src, *roi_tensor_ptr_dst; + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return_status = VX_ERROR_NOT_IMPLEMENTED; +#endif + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[3], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_dst, sizeof(roi_tensor_ptr_dst))); + } + updateDstRoi(data, reinterpret_cast(roi_tensor_ptr_src), reinterpret_cast(roi_tensor_ptr_dst)); + return status; +} + +static vx_status VX_CALLBACK validateSpectrogram(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_BOOL) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_BOOL) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[8], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #8 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[9], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #9 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[10], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #10 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[11], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #11 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[12], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #12 type=%d (must be size)\n", scalar_type); + + // Check for input parameters + size_t num_tensor_dims; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Spectrogram: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); + + // Check for output parameters + vx_uint8 tensor_fixed_point_position; + size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; + vx_enum tensor_datatype; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Spectrogram: tensor: #2 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + return status; +} + +static vx_status VX_CALLBACK processSpectrogram(vx_node node, const vx_reference *parameters, vx_uint32 num) { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + SpectrogramLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + refreshSpectrogram(node, parameters, data); + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return_status = VX_ERROR_NOT_IMPLEMENTED; +#endif + } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { +#if RPP_AUDIO + rpp_status = rppt_spectrogram_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcLength, data->centerWindows, data->reflectPadding, + data->pWindowFn, data->nfft, data->power, data->windowLength, data->windowStep, data->handle->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#else + return_status = VX_ERROR_NOT_SUPPORTED; +#endif + } + return return_status; +} + +static vx_status VX_CALLBACK initializeSpectrogram(vx_node node, const vx_reference *parameters, vx_uint32 num) { + SpectrogramLocalData *data = new SpectrogramLocalData; + if (data) { + memset(data, 0, sizeof(SpectrogramLocalData)); + + vx_enum input_tensor_datatype, output_tensor_datatype; + int spectrogram_layout; + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->centerWindows)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &data->reflectPadding)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &spectrogram_layout)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[8], &data->power)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[9], &data->nfft)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[10], &data->windowLength)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[11], &data->windowStep)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[12], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + data->spectrogramLayout = static_cast(spectrogram_layout); + data->windowOffset = (!data->centerWindows) ? data->windowLength : 0; + + // Querying for input tensor + data->pSrcDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); + data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); + data->pSrcDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); + + // Querying for output tensor + data->pDstDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); + data->pDstDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims, data->spectrogramLayout); + + data->pSrcLength = new int[data->pSrcDesc->n]; + data->pWindowFn = new float[data->windowLength]; + + STATUS_ERROR_CHECK(vxCopyArrayRange((vx_array)parameters[4], 0, data->windowLength, sizeof(float), data->pWindowFn, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; + } else { + return VX_FAILURE;; + } +} + +static vx_status VX_CALLBACK uninitializeSpectrogram(vx_node node, const vx_reference *parameters, vx_uint32 num) { + SpectrogramLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + if (data->pSrcLength) delete[] data->pSrcLength; + if (data->pWindowFn) delete[] data->pWindowFn; + if (data->pSrcDesc) delete data->pSrcDesc; + if (data->pDstDesc) delete data->pDstDesc; + STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); + delete data; + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + + return VX_SUCCESS; +} + +vx_status Spectrogram_Register(vx_context context) { + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Spectrogram", + VX_KERNEL_RPP_SPECTROGRAM, + processSpectrogram, + 13, + validateSpectrogram, + initializeSpectrogram, + uninitializeSpectrogram); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_HIP + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); +#else + vx_bool enableBufferAccess = vx_false_e; +#endif + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_ARRAY, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 9, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 10, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 11, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 12, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + + return status; +} From 832d9ef9ca10c9aaaafcda58f88b708c1d7eb882 Mon Sep 17 00:00:00 2001 From: SundarRajan28 Date: Thu, 30 May 2024 11:13:01 +0000 Subject: [PATCH 16/16] Rename map variable --- amd_openvx_extensions/amd_rpp/include/internal_rpp.h | 2 +- amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h index 2c5190de6..c80383948 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h @@ -75,7 +75,7 @@ enum vxTensorLayout { VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank }; -const std::map TENSOR_LAYOUT_MAPPING = { +const std::map tensorLayoutMapping = { {vxTensorLayout::VX_NHWC, RpptLayout::NHWC}, {vxTensorLayout::VX_NCHW, RpptLayout::NCHW}, {vxTensorLayout::VX_NFHWC, RpptLayout::NHWC}, diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index b242e7de0..99607e2c5 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2700,8 +2700,8 @@ void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *maxTensorDims descPtr->strides.wStride = descPtr->c; descPtr->strides.cStride = 1; descPtr->numDims = 4; - if(TENSOR_LAYOUT_MAPPING.find(layout) != TENSOR_LAYOUT_MAPPING.end()) { - descPtr->layout = TENSOR_LAYOUT_MAPPING.at(layout); + if(tensorLayoutMapping.find(layout) != tensorLayoutMapping.end()) { + descPtr->layout = tensorLayoutMapping.at(layout); } else { throw std::runtime_error("Invalid layout"); }