Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DownMix Node and to_decibels augmentations #32

Open
wants to merge 37 commits into
base: swbs_m2/audio/pr5
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ba6ff14
Merging swbs_m2/audio/pr5 into swbs_m3/audio/pr6
swetha097 Mar 14, 2024
e94c53f
Fixing build issues
SundarRajan28 Mar 15, 2024
4495d5c
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 22, 2024
247cf70
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 22, 2024
1a51fee
Add openvx extensions for to_decibels augmentations
SundarRajan28 Mar 22, 2024
e8ff20e
Merge branch 'swbs_m1/audio/pr3' into swbs_m3/audio/pr6
SundarRajan28 Mar 25, 2024
a649fba
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 25, 2024
01aba8a
Fixing issues with src ROI
SundarRajan28 Mar 25, 2024
a25b645
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 25, 2024
68a5d7e
Resolving review comments
SundarRajan28 Mar 25, 2024
ebe8933
Resolving review comments
SundarRajan28 Mar 25, 2024
a446bff
Removing dstROI from to_decibels openvx augmentation
SundarRajan28 Mar 25, 2024
ce4778f
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 25, 2024
070bd82
Resolving review comments
SundarRajan28 Mar 25, 2024
658504f
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 26, 2024
a2c1063
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Mar 26, 2024
b08b602
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Apr 16, 2024
3be53bf
Update changelog
SundarRajan28 Apr 16, 2024
c69afd8
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Apr 17, 2024
58d8c0d
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 Apr 17, 2024
4121a9c
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
swetha097 May 8, 2024
d7db0cb
Merge remote-tracking branch 'upstream/develop' into swbs_m3/audio/pr6
SundarRajan28 May 10, 2024
81c5533
Resolving review comments
SundarRajan28 May 13, 2024
209b83a
Merge remote-tracking branch 'swbs_m2/audio/pr5_layout' into swbs_m3/…
SundarRajan28 May 13, 2024
e0883dd
Resolving review comments
SundarRajan28 May 14, 2024
a849125
Minor changes
SundarRajan28 May 15, 2024
36fdc89
Removing if blocks for CPU device checks
SundarRajan28 May 15, 2024
99eeb10
Merge remote-tracking branch 'upstream/develop' into swbs_m3/audio/pr6
SundarRajan28 May 17, 2024
fefc759
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
fiona-gladwin May 20, 2024
ff3c50f
Add RPP_AUDIO flag for RPP audio API
fiona-gladwin May 20, 2024
3f8d1c8
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
fiona-gladwin May 20, 2024
f1cb901
Merge remote-tracking branch 'upstream/develop' into swbs_m3/audio/pr6
SundarRajan28 May 22, 2024
f601ad9
Merge branch 'develop' into swbs_m3/audio/pr6
SundarRajan28 May 24, 2024
0df3adb
Audio PR - Augmentation support [ Spectrogram ] (#1355)
fiona-gladwin May 28, 2024
b1c7694
Merge branch 'swbs_m2/audio/pr5' into swbs_m3/audio/pr6
SundarRajan28 May 29, 2024
f4dd4e3
Merge remote-tracking branch 'upstream/develop' into swbs_m3/audio/pr6
SundarRajan28 May 29, 2024
832d9ef
Rename map variable
SundarRajan28 May 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Documentation for MIVisionX is available at
* Support for advanced GPUs
* Support for PreEmphasis Filter augmentation in openVX extensions
* Support for Spectrogram augmentation in openVX extensions
* Support for Downmix and ToDecibels augmentations in openVX extensions

### Optimizations

Expand Down
2 changes: 2 additions & 0 deletions amd_openvx_extensions/amd_rpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ list(APPEND SOURCES
source/tensor/Copy.cpp
source/tensor/Crop.cpp
source/tensor/CropMirrorNormalize.cpp
source/tensor/Downmix.cpp
source/tensor/Exposure.cpp
source/tensor/FishEye.cpp
source/tensor/Flip.cpp
Expand All @@ -158,6 +159,7 @@ list(APPEND SOURCES
source/tensor/SequenceRearrange.cpp
source/tensor/Snow.cpp
source/tensor/Spectrogram.cpp
source/tensor/ToDecibels.cpp
source/tensor/Vignette.cpp
source/tensor/WarpAffine.cpp
source/tensor/SequenceRearrange.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ vx_status Vignette_Register(vx_context);
vx_status WarpAffine_Register(vx_context);
vx_status SequenceRearrange_Register(vx_context);
vx_status Spectrogram_Register(vx_context);
vx_status Downmix_Register(vx_context);
vx_status ToDecibels_Register(vx_context);

// kernel names
#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD"
Expand Down Expand Up @@ -283,5 +285,7 @@ vx_status Spectrogram_Register(vx_context);
#define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange"
SundarRajan28 marked this conversation as resolved.
Show resolved Hide resolved
#define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter"
#define VX_KERNEL_RPP_SPECTROGRAM_NAME "org.rpp.Spectrogram"
#define VX_KERNEL_RPP_DOWNMIX_NAME "org.rpp.Downmix"
#define VX_KERNEL_RPP_TODECIBELS_NAME "org.rpp.ToDecibels"

#endif //_AMDVX_EXT__PUBLISH_KERNELS_H_
14 changes: 13 additions & 1 deletion amd_openvx_extensions/amd_rpp/include/internal_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ THE SOFTWARE.
#include<iostream>
#include<algorithm>
#include<functional>
#include<map>

using namespace std;

Expand Down Expand Up @@ -74,14 +75,25 @@ enum vxTensorLayout {
VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank
};

const std::map<vxTensorLayout, RpptLayout> tensorLayoutMapping = {
{vxTensorLayout::VX_NHWC, RpptLayout::NHWC},
{vxTensorLayout::VX_NCHW, RpptLayout::NCHW},
{vxTensorLayout::VX_NFHWC, RpptLayout::NHWC},
{vxTensorLayout::VX_NFCHW, RpptLayout::NCHW},
#if RPP_AUDIO
{vxTensorLayout::VX_NHW, RpptLayout::NHW},
{vxTensorLayout::VX_NFT, RpptLayout::NFT},
{vxTensorLayout::VX_NTF, RpptLayout::NTF}
#endif
};

//! Brief The utility functions
vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx_uint32 num);
vx_status createRPPHandle(vx_node node, vxRppHandle ** pHandle, Rpp32u batchSize, Rpp32u deviceType);
vx_status releaseRPPHandle(vx_node node, vxRppHandle * handle, Rpp32u deviceType);
void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims);
void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims, vxTensorLayout layout = vxTensorLayout::VX_NHW);
RpptDataType getRpptDataType(vx_enum dataType);
RpptLayout getRpptLayout(vxTensorLayout layout);

class Kernellist
{
Expand Down
4 changes: 3 additions & 1 deletion amd_openvx_extensions/amd_rpp/include/kernels_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ extern "C"
VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72,
VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73,
VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74,
VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75
VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75,
VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76,
VX_KERNEL_RPP_TODECIBELS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77
};

#ifdef __cplusplus
Expand Down
25 changes: 25 additions & 0 deletions amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1896,6 +1896,31 @@ extern "C"
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFunction, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout, vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep);

/*! \brief [Graph] Applies downmixing to the input tensor.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
* \param [out] pDst The output tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
* \param [in] pSrcRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input.
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi);
SundarRajan28 marked this conversation as resolved.
Show resolved Hide resolved

/*! \brief [Graph] Applies to_decibels augmentation to the input tensor.
* \ingroup group_amd_rpp
* \param [in] graph The handle to the graph.
* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
* \param[in] pSrcRoi The input tensor of batch size in <tt>unsigned int<tt> containing the roi values for the input.
* \param [out] pDst The output tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
* \param[in] cutOffDB The input scalar in <tt>\ref VX_TYPE_FLOAT32</tt> format containing minimum or cut-off ratio in dB
Copy link
Collaborator

@fiona-gladwin fiona-gladwin Mar 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it dims or ROI, please change name accordingly

For both srcDims and dstDims

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also is it of xywh or ltrb format? For audio

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change the supported data types

* \param[in] multiplier The input scalar in <tt>\ref VX_TYPE_FLOAT32</tt> format containing factor by which the logarithm is multiplied
* \param[in] referenceMagnitude The input scalar in <tt>\ref VX_TYPE_FLOAT32</tt> format containing Reference magnitude which if not provided uses maximum value of input as reference
* \param [in] inputLayout The input layout in <tt>\ref VX_TYPE_INT32</tt> denotes the layout of input tensor.
* \param [in] outputLayout The output layout in <tt>\ref VX_TYPE_INT32</tt> denotes the layout of output tensor.
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
*/
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude, vx_scalar inputLayout, vx_scalar outputLayout);

#ifdef __cplusplus
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ vx_status get_kernels_to_publish()
STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(Downmix_Register));
STATUS_ERROR_CHECK(ADD_KERNEL(ToDecibels_Register));

return status;
}
Expand Down
71 changes: 42 additions & 29 deletions amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2584,6 +2584,43 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p
return node;
}

VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor pSrcRoi) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
vx_uint32 devType = getGraphAffinity(graph);
vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
vx_reference params[] = {
(vx_reference)pSrc,
(vx_reference)pDst,
(vx_reference)pSrcRoi,
(vx_reference)deviceType};
node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4);
}
return node;
}

VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude, vx_scalar inputLayout, vx_scalar outputLayout) {
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
vx_uint32 devType = getGraphAffinity(graph);
vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType);
vx_reference params[] = {
(vx_reference)pSrc,
(vx_reference)pSrcRoi,
(vx_reference)pDst,
(vx_reference)cutOffDB,
(vx_reference)multiplier,
(vx_reference)referenceMagnitude,
(vx_reference)inputLayout,
(vx_reference)outputLayout,
(vx_reference)deviceType};
node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 9);
}
return node;
}

RpptDataType getRpptDataType(vx_enum vxDataType) {
switch(vxDataType) {
case vx_type_e::VX_TYPE_FLOAT32:
Expand All @@ -2597,34 +2634,6 @@ RpptDataType getRpptDataType(vx_enum vxDataType) {
}
}

RpptLayout getRpptLayout(vxTensorLayout layout) {
switch(layout) {
case vxTensorLayout::VX_NHWC:
return RpptLayout::NHWC;
case vxTensorLayout::VX_NCHW:
return RpptLayout::NCHW;
case vxTensorLayout::VX_NFHWC:
return RpptLayout::NHWC;
case vxTensorLayout::VX_NFCHW:
return RpptLayout::NCHW;
#if RPP_AUDIO
case vxTensorLayout::VX_NHW:
return RpptLayout::NHW;
case vxTensorLayout::VX_NFT:
return RpptLayout::NFT;
case vxTensorLayout::VX_NTF:
return RpptLayout::NTF;
#else
case vxTensorLayout::VX_NHW:
case vxTensorLayout::VX_NFT:
case vxTensorLayout::VX_NTF:
throw std::runtime_error("RPP_AUDIO flag disabled, Audio layouts are not supported");
#endif
default:
throw std::runtime_error("Invalid layout");
}
}

void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims) {
switch(layout) {
case vxTensorLayout::VX_NHWC: {
Expand Down Expand Up @@ -2691,7 +2700,11 @@ void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *maxTensorDims
descPtr->strides.wStride = descPtr->c;
descPtr->strides.cStride = 1;
descPtr->numDims = 4;
descPtr->layout = getRpptLayout(layout);
if(tensorLayoutMapping.find(layout) != tensorLayoutMapping.end()) {
descPtr->layout = tensorLayoutMapping.at(layout);
} else {
throw std::runtime_error("Invalid layout");
}
}

// utility functions
Expand Down
Loading